summaryrefslogtreecommitdiff
path: root/storage/rocksdb
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2018-01-27 11:52:34 +0000
committerSergei Petrunia <psergey@askmonty.org>2018-01-27 11:52:34 +0000
commite3a03da2bcaed43a06b49b4d22f462804503ec14 (patch)
tree8f21d9fff7cfb0cb205f1116a8d41f11838d7fac /storage/rocksdb
parent2da191791260d5ca3adcb6857404912d52926ee2 (diff)
parent445e518bc7aa5f4bb48d98e798905c9c0b0ce673 (diff)
downloadmariadb-git-e3a03da2bcaed43a06b49b4d22f462804503ec14.tar.gz
Merge from merge-myrocks:
commit 445e518bc7aa5f4bb48d98e798905c9c0b0ce673 Author: Sergei Petrunia <psergey@askmonty.org> Date: Sat Jan 27 10:18:20 2018 +0000 Copy of commit f8f364b47f2784f16b401f27658f1c16eaf348ec Author: Jay Edgar <jkedgar@fb.com> Date: Tue Oct 17 15:19:31 2017 -0700 Add a hashed, hierarchical, wheel timer implementation Summary: In order to implement idle timeouts on detached sessions we need something inside MySQL that is lightweight and can handle calling events in the future wi By default the timers are grouped into 10ms buckets (the 'hashed' part), though the size of the buckets is configurable at the creation of the timer. Eac Reviewed By: djwatson Differential Revision: D6199806 fbshipit-source-id: 5e1590f
Diffstat (limited to 'storage/rocksdb')
-rw-r--r--storage/rocksdb/CMakeLists.txt4
-rw-r--r--storage/rocksdb/build_rocksdb.cmake4
-rw-r--r--storage/rocksdb/event_listener.cc10
-rw-r--r--storage/rocksdb/event_listener.h3
-rw-r--r--storage/rocksdb/ha_rocksdb.cc1869
-rw-r--r--storage/rocksdb/ha_rocksdb.h102
-rwxr-xr-xstorage/rocksdb/myrocks_hotbackup686
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc150
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc (renamed from storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc)12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc143
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result113
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result113
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result107
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result42
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result58
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result117
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cardinality.result36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result67
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result172
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue255.result47
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result42
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/perf_context.result32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result89
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_engine.result36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/write_sync.result5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test118
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test94
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test133
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cardinality.test42
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test158
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue255.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test63
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/write_sync.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result84
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result58
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result)8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result84
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test)2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test18
-rw-r--r--storage/rocksdb/properties_collector.cc145
-rw-r--r--storage/rocksdb/properties_collector.h41
-rw-r--r--storage/rocksdb/rdb_cf_options.cc8
-rw-r--r--storage/rocksdb/rdb_cf_options.h3
-rw-r--r--storage/rocksdb/rdb_compact_filter.h2
-rw-r--r--storage/rocksdb/rdb_datadic.cc196
-rw-r--r--storage/rocksdb/rdb_datadic.h145
-rw-r--r--storage/rocksdb/rdb_i_s.cc149
-rw-r--r--storage/rocksdb/rdb_i_s.h1
-rw-r--r--storage/rocksdb/rdb_io_watchdog.cc2
-rw-r--r--storage/rocksdb/rdb_perf_context.cc31
-rw-r--r--storage/rocksdb/rdb_perf_context.h14
-rw-r--r--storage/rocksdb/rdb_psi.cc3
-rw-r--r--storage/rocksdb/rdb_psi.h3
-rw-r--r--storage/rocksdb/rdb_sst_info.cc38
-rw-r--r--storage/rocksdb/rdb_sst_info.h6
-rw-r--r--storage/rocksdb/rdb_utils.cc26
-rw-r--r--storage/rocksdb/rdb_utils.h12
m---------storage/rocksdb/rocksdb0
133 files changed, 5562 insertions, 1220 deletions
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
index 6cb7eb1d439..d5e9b1914f0 100644
--- a/storage/rocksdb/CMakeLists.txt
+++ b/storage/rocksdb/CMakeLists.txt
@@ -178,9 +178,9 @@ IF(HAVE_SCHED_GETCPU)
ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
ENDIF()
-IF (NOT "$ENV{WITH_TBB}" STREQUAL "")
+IF (WITH_TBB)
SET(rocksdb_static_libs ${rocksdb_static_libs}
- $ENV{WITH_TBB}/libtbb${PIC_EXT}.a)
+ ${WITH_TBB}/lib/libtbb${PIC_EXT}.a)
ADD_DEFINITIONS(-DTBB)
ENDIF()
diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake
index 5810412f566..b9b70593166 100644
--- a/storage/rocksdb/build_rocksdb.cmake
+++ b/storage/rocksdb/build_rocksdb.cmake
@@ -245,6 +245,7 @@ set(ROCKSDB_SOURCES
table/block_based_table_factory.cc
table/block_based_table_reader.cc
table/block_builder.cc
+ table/block_fetcher.cc
table/block_prefix_index.cc
table/bloom_block.cc
table/cuckoo_table_builder.cc
@@ -340,15 +341,16 @@ set(ROCKSDB_SOURCES
utilities/transactions/optimistic_transaction_db_impl.cc
utilities/transactions/pessimistic_transaction.cc
utilities/transactions/pessimistic_transaction_db.cc
+ utilities/transactions/snapshot_checker.cc
utilities/transactions/transaction_base.cc
utilities/transactions/transaction_db_mutex_impl.cc
utilities/transactions/transaction_lock_mgr.cc
utilities/transactions/transaction_util.cc
utilities/transactions/write_prepared_txn.cc
+ utilities/transactions/write_prepared_txn_db.cc
utilities/ttl/db_ttl_impl.cc
utilities/write_batch_with_index/write_batch_with_index.cc
utilities/write_batch_with_index/write_batch_with_index_internal.cc
-
)
if(WIN32)
diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc
index 5854da54210..1a621b5df4b 100644
--- a/storage/rocksdb/event_listener.cc
+++ b/storage/rocksdb/event_listener.cc
@@ -83,4 +83,14 @@ void Rdb_event_listener::OnExternalFileIngested(
DBUG_ASSERT(db != nullptr);
update_index_stats(info.table_properties);
}
+
+void Rdb_event_listener::OnBackgroundError(
+ rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) {
+ rdb_log_status_error(*status, "Error detected in background");
+ sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason);
+ if (status->IsCorruption()) {
+ rdb_persist_corruption_marker();
+ abort();
+ }
+}
} // namespace myrocks
diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h
index d535031644b..8772105de36 100644
--- a/storage/rocksdb/event_listener.h
+++ b/storage/rocksdb/event_listener.h
@@ -37,6 +37,9 @@ public:
rocksdb::DB *db,
const rocksdb::ExternalFileIngestionInfo &ingestion_info) override;
+ void OnBackgroundError(rocksdb::BackgroundErrorReason reason,
+ rocksdb::Status *status) override;
+
private:
Rdb_ddl_manager *m_ddl_manager;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 4a0a3eddb1a..aa1e08fcf50 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -144,10 +144,6 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton,
my_core::TABLE_SHARE *table_arg,
my_core::MEM_ROOT *mem_root);
-bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
- const rocksdb::Slice &eq_cond,
- const bool use_all_keys);
-
static rocksdb::CompactRangeOptions getCompactRangeOptions() {
rocksdb::CompactRangeOptions compact_range_options;
compact_range_options.bottommost_level_compaction =
@@ -423,24 +419,37 @@ static void rocksdb_set_collation_exception_list(THD *thd,
void *var_ptr,
const void *save);
-void rocksdb_set_update_cf_options(THD *thd,
- struct st_mysql_sys_var *var,
- void *var_ptr,
- const void *save);
+static int rocksdb_validate_update_cf_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *save,
+ st_mysql_value *value);
-static void
-rocksdb_set_bulk_load(THD *thd,
- struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
- void *var_ptr, const void *save);
+static void rocksdb_set_update_cf_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
+static int rocksdb_check_bulk_load(THD *const thd,
+ struct st_mysql_sys_var *var
+ MY_ATTRIBUTE((__unused__)),
+ void *save,
+ struct st_mysql_value *value);
-static void rocksdb_set_bulk_load_allow_unsorted(
- THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
- void *var_ptr, const void *save);
+static int rocksdb_check_bulk_load_allow_unsorted(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value);
static void rocksdb_set_max_background_jobs(THD *thd,
struct st_mysql_sys_var *const var,
void *const var_ptr,
const void *const save);
+static void rocksdb_set_bytes_per_sync(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
+static void rocksdb_set_wal_bytes_per_sync(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
//////////////////////////////////////////////////////////////////////////////
// Options definitions
//////////////////////////////////////////////////////////////////////////////
@@ -471,6 +480,7 @@ static char *rocksdb_compact_cf_name;
static char *rocksdb_checkpoint_name;
static my_bool rocksdb_signal_drop_index_thread;
static my_bool rocksdb_strict_collation_check = 1;
+static my_bool rocksdb_ignore_unknown_options = 1;
static my_bool rocksdb_enable_2pc = 0;
static char *rocksdb_strict_collation_exceptions;
static my_bool rocksdb_collect_sst_properties = 1;
@@ -484,7 +494,6 @@ static int rocksdb_debug_ttl_read_filter_ts = 0;
static my_bool rocksdb_debug_ttl_ignore_pk = 0;
static my_bool rocksdb_reset_stats = 0;
static uint32_t rocksdb_io_write_timeout_secs = 0;
-static uint64_t rocksdb_number_stat_computes = 0;
static uint32_t rocksdb_seconds_between_stat_computes = 3600;
static long long rocksdb_compaction_sequential_deletes = 0l;
static long long rocksdb_compaction_sequential_deletes_window = 0l;
@@ -495,11 +504,14 @@ static uint32_t rocksdb_table_stats_sampling_pct;
static my_bool rocksdb_enable_bulk_load_api = 1;
static my_bool rocksdb_print_snapshot_conflict_queries = 0;
static my_bool rocksdb_large_prefix = 0;
+static my_bool rocksdb_allow_to_start_after_corruption = 0;
static char* rocksdb_git_hash;
char *compression_types_val=
const_cast<char*>(get_rocksdb_supported_compression_types());
+std::atomic<uint64_t> rocksdb_row_lock_deadlocks(0);
+std::atomic<uint64_t> rocksdb_row_lock_wait_timeouts(0);
std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0);
std::atomic<uint64_t> rocksdb_wal_group_syncs(0);
@@ -510,8 +522,9 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
o->listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager));
o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;
+ o->max_open_files = -2; // auto-tune to 50% open_files_limit
- o->concurrent_prepare = true;
+ o->two_write_queues = true;
o->manual_wal_flush = true;
return o;
}
@@ -593,6 +606,33 @@ static void rocksdb_set_io_write_timeout(
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
+enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
+ FLUSH_LOG_NEVER = 0,
+ FLUSH_LOG_SYNC,
+ FLUSH_LOG_BACKGROUND,
+ FLUSH_LOG_MAX /* must be last */
+};
+
+static int rocksdb_validate_flush_log_at_trx_commit(
+ THD *const thd,
+ struct st_mysql_sys_var *const var, /* in: pointer to system variable */
+ void *var_ptr, /* out: immediate result for update function */
+ struct st_mysql_value *const value /* in: incoming value */) {
+ long long new_value;
+
+ /* value is NULL */
+ if (value->val_int(value, &new_value)) {
+ return HA_EXIT_FAILURE;
+ }
+
+ if (rocksdb_db_options->allow_mmap_writes && new_value != FLUSH_LOG_NEVER) {
+ return HA_EXIT_FAILURE;
+ }
+
+ *static_cast<uint32_t *>(var_ptr) = static_cast<uint32_t>(new_value);
+ return HA_EXIT_SUCCESS;
+}
+
static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS};
static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1,
@@ -600,7 +640,7 @@ static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1,
nullptr};
const ulong RDB_MAX_LOCK_WAIT_SECONDS = 1024 * 1024 * 1024;
-const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024 * 1024;
+const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024;
const ulong RDB_DEFAULT_BULK_LOAD_SIZE = 1000;
const ulong RDB_MAX_BULK_LOAD_SIZE = 1024 * 1024 * 1024;
const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024;
@@ -640,12 +680,13 @@ static MYSQL_THDVAR_BOOL(
bulk_load, PLUGIN_VAR_RQCMDARG,
"Use bulk-load mode for inserts. This disables "
"unique_checks and enables rocksdb_commit_in_the_middle.",
- nullptr, rocksdb_set_bulk_load, FALSE);
+ rocksdb_check_bulk_load, nullptr, FALSE);
static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG,
"Allow unsorted input during bulk-load. "
"Can be changed only when bulk load is disabled.",
- nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE);
+ rocksdb_check_bulk_load_allow_unsorted, nullptr,
+ FALSE);
static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -751,11 +792,11 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_db_options->create_if_missing);
static MYSQL_SYSVAR_BOOL(
- concurrent_prepare,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options->concurrent_prepare),
+ two_write_queues,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->two_write_queues),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr,
- rocksdb_db_options->concurrent_prepare);
+ "DBOptions::two_write_queues for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->two_write_queues);
static MYSQL_SYSVAR_BOOL(
manual_wal_flush,
@@ -882,7 +923,7 @@ static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options->max_open_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::max_open_files for RocksDB", nullptr,
nullptr, rocksdb_db_options->max_open_files,
- /* min */ -1, /* max */ INT_MAX, 0);
+ /* min */ -2, /* max */ INT_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(max_total_wal_size,
rocksdb_db_options->max_total_wal_size,
@@ -1063,16 +1104,18 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_db_options->use_adaptive_mutex);
static MYSQL_SYSVAR_UINT64_T(bytes_per_sync, rocksdb_db_options->bytes_per_sync,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ PLUGIN_VAR_RQCMDARG,
"DBOptions::bytes_per_sync for RocksDB", nullptr,
- nullptr, rocksdb_db_options->bytes_per_sync,
+ rocksdb_set_bytes_per_sync,
+ rocksdb_db_options->bytes_per_sync,
/* min */ 0L, /* max */ ULONGLONG_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(wal_bytes_per_sync,
rocksdb_db_options->wal_bytes_per_sync,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ PLUGIN_VAR_RQCMDARG,
"DBOptions::wal_bytes_per_sync for RocksDB", nullptr,
- nullptr, rocksdb_db_options->wal_bytes_per_sync,
+ rocksdb_set_wal_bytes_per_sync,
+ rocksdb_db_options->wal_bytes_per_sync,
/* min */ 0L, /* max */ ULONGLONG_MAX, 0);
static MYSQL_SYSVAR_BOOL(
@@ -1190,22 +1233,17 @@ static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options,
static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC
/* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/,
- "Option updates per column family for RocksDB", nullptr,
+ "Option updates per column family for RocksDB",
+ rocksdb_validate_update_cf_options,
rocksdb_set_update_cf_options, nullptr);
-enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
- FLUSH_LOG_NEVER = 0,
- FLUSH_LOG_SYNC,
- FLUSH_LOG_BACKGROUND,
- FLUSH_LOG_MAX /* must be last */
-};
-
static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit,
rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
"Sync on transaction commit. Similar to "
"innodb_flush_log_at_trx_commit. 1: sync on commit, "
"0,2: not sync on commit",
- nullptr, nullptr, /* default */ FLUSH_LOG_SYNC,
+ rocksdb_validate_flush_log_at_trx_commit, nullptr,
+ /* default */ FLUSH_LOG_SYNC,
/* min */ FLUSH_LOG_NEVER,
/* max */ FLUSH_LOG_BACKGROUND, 0);
@@ -1346,6 +1384,11 @@ static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG,
"Enable two phase commit for MyRocks", nullptr,
nullptr, TRUE);
+static MYSQL_SYSVAR_BOOL(ignore_unknown_options, rocksdb_ignore_unknown_options,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Enable ignoring unknown options passed to RocksDB",
+ nullptr, nullptr, TRUE);
+
static MYSQL_SYSVAR_BOOL(strict_collation_check, rocksdb_strict_collation_check,
PLUGIN_VAR_RQCMDARG,
"Enforce case sensitive collation for MyRocks indexes",
@@ -1377,11 +1420,6 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_force_flush_memtable_and_lzero_now,
rocksdb_force_flush_memtable_and_lzero_now_stub, FALSE);
-static MYSQL_THDVAR_BOOL(
- flush_memtable_on_analyze, PLUGIN_VAR_RQCMDARG,
- "Forces memtable flush on ANALZYE table to get accurate cardinality",
- nullptr, nullptr, true);
-
static MYSQL_SYSVAR_UINT(
seconds_between_stat_computes, rocksdb_seconds_between_stat_computes,
PLUGIN_VAR_RQCMDARG,
@@ -1498,6 +1536,13 @@ static MYSQL_SYSVAR_BOOL(
"index prefix length is 767.",
nullptr, nullptr, FALSE);
+static MYSQL_SYSVAR_BOOL(
+ allow_to_start_after_corruption, rocksdb_allow_to_start_after_corruption,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Allow server still to start successfully even if RocksDB corruption is "
+ "detected.",
+ nullptr, nullptr, FALSE);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -1523,7 +1568,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(skip_bloom_filter_on_read),
MYSQL_SYSVAR(create_if_missing),
- MYSQL_SYSVAR(concurrent_prepare),
+ MYSQL_SYSVAR(two_write_queues),
MYSQL_SYSVAR(manual_wal_flush),
MYSQL_SYSVAR(create_missing_column_families),
MYSQL_SYSVAR(error_if_exists),
@@ -1605,6 +1650,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(signal_drop_index_thread),
MYSQL_SYSVAR(pause_background_work),
MYSQL_SYSVAR(enable_2pc),
+ MYSQL_SYSVAR(ignore_unknown_options),
MYSQL_SYSVAR(strict_collation_check),
MYSQL_SYSVAR(strict_collation_exceptions),
MYSQL_SYSVAR(collect_sst_properties),
@@ -1618,7 +1664,6 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(debug_ttl_ignore_pk),
MYSQL_SYSVAR(reset_stats),
MYSQL_SYSVAR(io_write_timeout),
- MYSQL_SYSVAR(flush_memtable_on_analyze),
MYSQL_SYSVAR(seconds_between_stat_computes),
MYSQL_SYSVAR(compaction_sequential_deletes),
@@ -1640,6 +1685,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(table_stats_sampling_pct),
MYSQL_SYSVAR(large_prefix),
+ MYSQL_SYSVAR(allow_to_start_after_corruption),
MYSQL_SYSVAR(git_hash),
nullptr};
@@ -1768,6 +1814,7 @@ protected:
ulonglong m_update_count = 0;
ulonglong m_delete_count = 0;
ulonglong m_lock_count = 0;
+ std::unordered_map<GL_INDEX_ID, ulonglong> m_auto_incr_map;
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
@@ -1800,7 +1847,30 @@ protected:
get_iterator(const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family) = 0;
-public:
+ /*
+ @detail
+ This function takes in the WriteBatch of the transaction to add
+ all the AUTO_INCREMENT merges. It does so by iterating through
+ m_auto_incr_map and then constructing key/value pairs to call merge upon.
+
+ @param wb
+ */
+ rocksdb::Status merge_auto_incr_map(rocksdb::WriteBatchBase *const wb) {
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", return rocksdb::Status::OK(););
+
+ // Iterate through the merge map merging all keys into data dictionary.
+ rocksdb::Status s;
+ for (auto &it : m_auto_incr_map) {
+ s = dict_manager.put_auto_incr_val(wb, it.first, it.second);
+ if (!s.ok()) {
+ return s;
+ }
+ }
+ m_auto_incr_map.clear();
+ return s;
+ }
+
+ public:
const char *m_mysql_log_file_name;
my_off_t m_mysql_log_offset;
#ifdef MARIAROCKS_NOT_YET
@@ -1869,6 +1939,7 @@ public:
m_detailed_error.copy(timeout_message(
"index", tbl_def->full_tablename().c_str(), kd.get_name().c_str()));
table_handler->m_lock_wait_timeout_counter.inc();
+ rocksdb_row_lock_wait_timeouts++;
return HA_ERR_LOCK_WAIT_TIMEOUT;
}
@@ -1878,6 +1949,7 @@ public:
false /* just statement */);
m_detailed_error = String();
table_handler->m_deadlock_counter.inc();
+ rocksdb_row_lock_deadlocks++;
return HA_ERR_LOCK_DEADLOCK;
} else if (s.IsBusy()) {
rocksdb_snapshot_conflict_errors++;
@@ -2031,28 +2103,110 @@ public:
bool has_snapshot() const { return m_read_opts.snapshot != nullptr; }
private:
- // The tables we are currently loading. In a partitioned table this can
- // have more than one entry
- std::vector<ha_rocksdb *> m_curr_bulk_load;
+ // The Rdb_sst_info structures we are currently loading. In a partitioned
+ // table this can have more than one entry
+ std::vector<std::shared_ptr<Rdb_sst_info>> m_curr_bulk_load;
+ std::string m_curr_bulk_load_tablename;
+
+ /* External merge sorts for bulk load: key ID -> merge sort instance */
+ std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
public:
- int finish_bulk_load() {
- int rc = 0;
+ int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf,
+ Rdb_index_merge **key_merge) {
+ int res;
+ auto it = m_key_merge.find(kd_gl_id);
+ if (it == m_key_merge.end()) {
+ m_key_merge.emplace(
+ std::piecewise_construct, std::make_tuple(kd_gl_id),
+ std::make_tuple(
+ get_rocksdb_tmpdir(), THDVAR(get_thd(), merge_buf_size),
+ THDVAR(get_thd(), merge_combine_read_size),
+ THDVAR(get_thd(), merge_tmp_file_removal_delay_ms), cf));
+ it = m_key_merge.find(kd_gl_id);
+ if ((res = it->second.init()) != 0) {
+ return res;
+ }
+ }
+ *key_merge = &it->second;
+ return HA_EXIT_SUCCESS;
+ }
+
+ int finish_bulk_load(int print_client_error = true) {
+ int rc = 0, rc2;
- std::vector<ha_rocksdb *>::iterator it;
- while ((it = m_curr_bulk_load.begin()) != m_curr_bulk_load.end()) {
- int rc2 = (*it)->finalize_bulk_load();
+ std::vector<std::shared_ptr<Rdb_sst_info>>::iterator it;
+ for (it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); it++) {
+ rc2 = (*it)->commit(print_client_error);
if (rc2 != 0 && rc == 0) {
rc = rc2;
}
}
-
+ m_curr_bulk_load.clear();
+ m_curr_bulk_load_tablename.clear();
DBUG_ASSERT(m_curr_bulk_load.size() == 0);
+ // Flush the index_merge sort buffers
+ if (!m_key_merge.empty()) {
+ rocksdb::Slice merge_key;
+ rocksdb::Slice merge_val;
+ for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
+ GL_INDEX_ID index_id = it->first;
+ std::shared_ptr<const Rdb_key_def> keydef =
+ ddl_manager.safe_find(index_id);
+ std::string table_name = ddl_manager.safe_get_table_name(index_id);
+
+ // Unable to find key definition or table name since the
+ // table could have been dropped.
+ // TODO(herman): there is a race here between dropping the table
+ // and detecting a drop here. If the table is dropped while bulk
+ // loading is finishing, these keys being added here may
+ // be missed by the compaction filter and not be marked for
+ // removal. It is unclear how to lock the sql table from the storage
+ // engine to prevent modifications to it while bulk load is occurring.
+ if (keydef == nullptr || table_name.empty()) {
+ rc2 = HA_ERR_ROCKSDB_BULK_LOAD;
+ break;
+ }
+ const std::string &index_name = keydef->get_name();
+ Rdb_index_merge &rdb_merge = it->second;
+
+ // Rdb_sst_info expects a denormalized table name in the form of
+ // "./database/table"
+ std::replace(table_name.begin(), table_name.end(), '.', '/');
+ table_name = "./" + table_name;
+ Rdb_sst_info sst_info(rdb, table_name, index_name, rdb_merge.get_cf(),
+ *rocksdb_db_options,
+ THDVAR(get_thd(), trace_sst_api));
+
+ while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) {
+ if ((rc2 = sst_info.put(merge_key, merge_val)) != 0) {
+ break;
+ }
+ }
+
+ // rc2 == -1 => finished ok; rc2 > 0 => error
+ if (rc2 > 0 || (rc2 = sst_info.commit(print_client_error)) != 0) {
+ if (rc == 0) {
+ rc = rc2;
+ }
+ break;
+ }
+ }
+ m_key_merge.clear();
+
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this
+ point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+ }
return rc;
}
- void start_bulk_load(ha_rocksdb *const bulk_load) {
+ int start_bulk_load(ha_rocksdb *const bulk_load,
+ std::shared_ptr<Rdb_sst_info> sst_info) {
/*
If we already have an open bulk load of a table and the name doesn't
match the current one, close out the currently running one. This allows
@@ -2062,29 +2216,46 @@ public:
DBUG_ASSERT(bulk_load != nullptr);
if (!m_curr_bulk_load.empty() &&
- !bulk_load->same_table(*m_curr_bulk_load[0])) {
+ bulk_load->get_table_basename() != m_curr_bulk_load_tablename) {
const auto res = finish_bulk_load();
- SHIP_ASSERT(res == 0);
- }
-
- m_curr_bulk_load.push_back(bulk_load);
- }
-
- void end_bulk_load(ha_rocksdb *const bulk_load) {
- for (auto it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end();
- it++) {
- if (*it == bulk_load) {
- m_curr_bulk_load.erase(it);
- return;
+ if (res != HA_EXIT_SUCCESS) {
+ m_curr_bulk_load.clear();
+ m_curr_bulk_load_tablename.clear();
+ return res;
}
}
- // Should not reach here
- SHIP_ASSERT(0);
+ /*
+ This used to track ha_rocksdb handler objects, but those can be
+ freed by the table cache while this was referencing them. Instead
+ of tracking ha_rocksdb handler objects, this now tracks the
+ Rdb_sst_info allocated, and both the ha_rocksdb handler and the
+ Rdb_transaction both have shared pointers to them.
+
+ On transaction complete, it will commit each Rdb_sst_info structure found.
+ If the ha_rocksdb object is freed, etc., it will also commit
+ the Rdb_sst_info. The Rdb_sst_info commit path needs to be idempotent.
+ */
+ m_curr_bulk_load.push_back(sst_info);
+ m_curr_bulk_load_tablename = bulk_load->get_table_basename();
+ return HA_EXIT_SUCCESS;
}
int num_ongoing_bulk_load() const { return m_curr_bulk_load.size(); }
+ const char *get_rocksdb_tmpdir() const {
+ const char *tmp_dir = THDVAR(get_thd(), tmpdir);
+
+ /*
+ We want to treat an empty string as nullptr, in these cases DDL operations
+ will use the default --tmpdir passed to mysql instead.
+ */
+ if (tmp_dir != nullptr && *tmp_dir == '\0') {
+ tmp_dir = nullptr;
+ }
+ return (tmp_dir);
+ }
+
/*
Flush the data accumulated so far. This assumes we're doing a bulk insert.
@@ -2111,6 +2282,20 @@ public:
return false;
}
+ void set_auto_incr(const GL_INDEX_ID &gl_index_id, ulonglong curr_id) {
+ m_auto_incr_map[gl_index_id] =
+ std::max(m_auto_incr_map[gl_index_id], curr_id);
+ }
+
+#ifndef NDEBUG
+ ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) {
+ if (m_auto_incr_map.count(gl_index_id) > 0) {
+ return m_auto_incr_map[gl_index_id];
+ }
+ return 0;
+ }
+#endif
+
virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
const rocksdb::Slice &value) = 0;
@@ -2134,15 +2319,17 @@ public:
virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
- std::string *value) const = 0;
+ rocksdb::PinnableSlice *const value) const = 0;
virtual rocksdb::Status
get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive) = 0;
rocksdb::Iterator *
get_iterator(rocksdb::ColumnFamilyHandle *const column_family,
bool skip_bloom_filter, bool fill_cache,
+ const rocksdb::Slice &eq_cond_lower_bound,
+ const rocksdb::Slice &eq_cond_upper_bound,
bool read_current = false, bool create_snapshot = true) {
// Make sure we are not doing both read_current (which implies we don't
// want a snapshot) and create_snapshot which makes sure we create
@@ -2157,6 +2344,8 @@ public:
if (skip_bloom_filter) {
options.total_order_seek = true;
+ options.iterate_lower_bound = &eq_cond_lower_bound;
+ options.iterate_upper_bound = &eq_cond_upper_bound;
} else {
// With this option, Iterator::Valid() returns false if key
// is outside of the prefix bloom filter range set at Seek().
@@ -2269,6 +2458,12 @@ private:
return false;
}
+ s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ return false;
+ }
+
s = m_rocksdb_tx->Prepare();
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
@@ -2279,13 +2474,24 @@ private:
bool commit_no_binlog() override {
bool res = false;
+ rocksdb::Status s;
+
+ s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+
release_snapshot();
- const rocksdb::Status s = m_rocksdb_tx->Commit();
+ s = m_rocksdb_tx->Commit();
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
res = true;
+ goto error;
}
+error:
/* Save the transaction object to be reused */
release_tx();
@@ -2306,6 +2512,7 @@ public:
m_update_count = 0;
m_delete_count = 0;
m_lock_count = 0;
+ m_auto_incr_map.clear();
m_ddl_transaction = false;
if (m_rocksdb_tx) {
release_snapshot();
@@ -2407,18 +2614,25 @@ public:
rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
- std::string *value) const override {
+ rocksdb::PinnableSlice *const value) const override {
+ // clean PinnableSlice right begfore Get() for multiple gets per statement
+ // the resources after the last Get in a statement are cleared in
+ // handler::reset call
+ value->Reset();
global_stats.queries[QUERIES_POINT].inc();
return m_rocksdb_tx->Get(m_read_opts, column_family, key, value);
}
rocksdb::Status
get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive) override {
if (++m_lock_count > m_max_row_locks)
return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
+ if (value != nullptr) {
+ value->Reset();
+ }
return m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value,
exclusive);
}
@@ -2546,13 +2760,24 @@ private:
bool commit_no_binlog() override {
bool res = false;
+ rocksdb::Status s;
+
+ s = merge_auto_incr_map(m_batch->GetWriteBatch());
+ if (!s.ok()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
+ res = true;
+ goto error;
+ }
+
release_snapshot();
- const rocksdb::Status s =
- rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch());
+
+ s = rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch());
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT);
res = true;
+ goto error;
}
+error:
reset();
m_write_count = 0;
@@ -2641,14 +2866,15 @@ public:
rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
- std::string *const value) const override {
+ rocksdb::PinnableSlice *const value) const override {
+ value->Reset();
return m_batch->GetFromBatchAndDB(rdb, m_read_opts, column_family, key,
value);
}
rocksdb::Status
get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive) override {
return get(column_family, key, value);
}
@@ -2777,13 +3003,12 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) {
static int rocksdb_close_connection(handlerton *const hton, THD *const thd) {
Rdb_transaction *&tx = get_tx_from_thd(thd);
if (tx != nullptr) {
- int rc = tx->finish_bulk_load();
+ int rc = tx->finish_bulk_load(false);
if (rc != 0) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Error %d finalizing last SST file while "
"disconnecting",
rc);
- abort_with_stack_traces();
}
delete tx;
@@ -2832,7 +3057,8 @@ static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__)))
/*
target_lsn is set to 0 when MySQL wants to sync the wal files
*/
- if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ if ((target_lsn == 0 && !rocksdb_db_options->allow_mmap_writes) ||
+ rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
rocksdb_wal_group_syncs++;
s = rdb->FlushWAL(target_lsn == 0 ||
rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
@@ -3308,79 +3534,54 @@ private:
"=========================================\n";
}
- static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn,
- const GL_INDEX_ID &gl_index_id,
- bool is_last_path = false) {
- std::string txn_data;
+ static Rdb_deadlock_info::Rdb_dl_trx_info
+ get_dl_txn_info(const rocksdb::DeadlockInfo &txn,
+ const GL_INDEX_ID &gl_index_id) {
+ Rdb_deadlock_info::Rdb_dl_trx_info txn_data;
+
+ txn_data.trx_id = txn.m_txn_id;
- /* extract table name and index names using the index id */
- std::string table_name = ddl_manager.safe_get_table_name(gl_index_id);
- if (table_name.empty()) {
- table_name =
+ txn_data.table_name = ddl_manager.safe_get_table_name(gl_index_id);
+ if (txn_data.table_name.empty()) {
+ txn_data.table_name =
"NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
}
+
auto kd = ddl_manager.safe_find(gl_index_id);
- std::string idx_name =
+ txn_data.index_name =
(kd) ? kd->get_name()
: "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
- /* get the name of the column family */
rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id);
- std::string cf_name = cfh->GetName();
-
- txn_data += format_string(
- "TRANSACTIONID: %u\n"
- "COLUMN FAMILY NAME: %s\n"
- "WAITING KEY: %s\n"
- "LOCK TYPE: %s\n"
- "INDEX NAME: %s\n"
- "TABLE NAME: %s\n",
- txn.m_txn_id, cf_name.c_str(),
- rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length())
- .c_str(),
- txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(),
- table_name.c_str());
- if (!is_last_path) {
- txn_data += "---------------WAITING FOR---------------\n";
- }
+ txn_data.cf_name = cfh->GetName();
+
+ txn_data.waiting_key =
+ rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length());
+
+ txn_data.exclusive_lock = txn.m_exclusive;
+
return txn_data;
}
- static std::string
- get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) {
- std::string path_data;
- if (path_entry.limit_exceeded) {
- path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
- } else {
- path_data += "\n*** DEADLOCK PATH\n"
- "=========================================\n";
- for (auto it = path_entry.path.begin(); it != path_entry.path.end();
- it++) {
- auto txn = *it;
- const GL_INDEX_ID gl_index_id = {
- txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
- txn.m_waiting_key.c_str()))};
- path_data += get_dlock_txn_info(txn, gl_index_id);
- }
+ static Rdb_deadlock_info
+ get_dl_path_trx_info(const rocksdb::DeadlockPath &path_entry) {
+ Rdb_deadlock_info deadlock_info;
- DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
- /* print the first txn in the path to display the full deadlock cycle */
- if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
- auto txn = path_entry.path[0];
- const GL_INDEX_ID gl_index_id = {
- txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
- txn.m_waiting_key.c_str()))};
- path_data += get_dlock_txn_info(txn, gl_index_id, true);
-
- /* prints the txn id of the transaction that caused the deadlock */
- auto deadlocking_txn = *(path_entry.path.end() - 1);
- path_data +=
- format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n",
- deadlocking_txn.m_txn_id);
- }
+ for (auto it = path_entry.path.begin(); it != path_entry.path.end();
+ it++) {
+ auto txn = *it;
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ deadlock_info.path.push_back(get_dl_txn_info(txn, gl_index_id));
}
-
- return path_data;
+ DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
+ /* print the first txn in the path to display the full deadlock cycle */
+ if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
+ auto deadlocking_txn = *(path_entry.path.end() - 1);
+ deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id;
+ }
+ return deadlock_info;
}
public:
@@ -3419,8 +3620,47 @@ private:
m_data += "----------LATEST DETECTED DEADLOCKS----------\n";
for (auto path_entry : dlock_buffer) {
- m_data += get_dlock_path_info(path_entry);
+ std::string path_data;
+ if (path_entry.limit_exceeded) {
+ path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
+ } else {
+ path_data += "\n*** DEADLOCK PATH\n"
+ "=========================================\n";
+ const auto dl_info = get_dl_path_trx_info(path_entry);
+ for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) {
+ const auto trx_info = *it;
+ path_data += format_string(
+ "TRANSACTION ID: %u\n"
+ "COLUMN FAMILY NAME: %s\n"
+ "WAITING KEY: %s\n"
+ "LOCK TYPE: %s\n"
+ "INDEX NAME: %s\n"
+ "TABLE NAME: %s\n",
+ trx_info.trx_id, trx_info.cf_name.c_str(),
+ trx_info.waiting_key.c_str(),
+ trx_info.exclusive_lock ? "EXCLUSIVE" : "SHARED",
+ trx_info.index_name.c_str(), trx_info.table_name.c_str());
+ if (it != dl_info.path.end() - 1) {
+ path_data += "---------------WAITING FOR---------------\n";
+ }
+ }
+ path_data +=
+ format_string("\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n",
+ dl_info.victim_trx_id);
+ }
+ m_data += path_data;
+ }
+ }
+
+ std::vector<Rdb_deadlock_info> get_deadlock_info() {
+ std::vector<Rdb_deadlock_info> deadlock_info;
+ auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
+ for (auto path_entry : dlock_buffer) {
+ if (!path_entry.limit_exceeded) {
+ deadlock_info.push_back(get_dl_path_trx_info(path_entry));
+ }
}
+ return deadlock_info;
}
};
@@ -3510,6 +3750,17 @@ std::vector<Rdb_trx_info> rdb_get_all_trx_info() {
return trx_info;
}
+
+/*
+ returns a vector of info of recent deadlocks
+ for use by information_schema.rocksdb_deadlock
+*/
+std::vector<Rdb_deadlock_info> rdb_get_deadlock_info() {
+ Rdb_snapshot_status showStatus;
+ Rdb_transaction::walk_tx_list(&showStatus);
+ return showStatus.get_deadlock_info();
+}
+
#ifdef MARIAROCKS_NOT_YET
/* Generate the snapshot status table */
static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd,
@@ -3816,6 +4067,7 @@ static void rocksdb_update_table_stats(
comp_stats_t comp_stats;
uint lock_wait_timeout_stats;
uint deadlock_stats;
+ uint lock_wait_stats;
std::vector<std::string> tablenames;
/*
@@ -3862,6 +4114,9 @@ static void rocksdb_update_table_stats(
io_perf_write.requests = table_handler->m_io_perf_write.requests.load();
lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load();
deadlock_stats = table_handler->m_deadlock_counter.load();
+ lock_wait_stats =
+ table_handler->m_table_perf_context.m_value[PC_KEY_LOCK_WAIT_COUNT]
+ .load();
/*
Convert from rocksdb timer to mysql timer. RocksDB values are
@@ -3889,7 +4144,7 @@ static void rocksdb_update_table_stats(
sizeof(tablename_sys));
(*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read,
&io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats,
- &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats,
+ &comp_stats, lock_wait_stats, lock_wait_timeout_stats, deadlock_stats,
rocksdb_hton_name);
}
}
@@ -3901,8 +4156,9 @@ static rocksdb::Status check_rocksdb_options_compatibility(
rocksdb::DBOptions loaded_db_opt;
std::vector<rocksdb::ColumnFamilyDescriptor> loaded_cf_descs;
- rocksdb::Status status = LoadLatestOptions(dbpath, rocksdb::Env::Default(),
- &loaded_db_opt, &loaded_cf_descs);
+ rocksdb::Status status =
+ LoadLatestOptions(dbpath, rocksdb::Env::Default(), &loaded_db_opt,
+ &loaded_cf_descs, rocksdb_ignore_unknown_options);
// If we're starting from scratch and there are no options saved yet then this
// is a valid case. Therefore we can't compare the current set of options to
@@ -3941,7 +4197,8 @@ static rocksdb::Status check_rocksdb_options_compatibility(
// This is the essence of the function - determine if it's safe to open the
// database or not.
status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), main_opts,
- loaded_cf_descs);
+ loaded_cf_descs,
+ rocksdb_ignore_unknown_options);
return status;
}
@@ -3954,6 +4211,22 @@ static rocksdb::Status check_rocksdb_options_compatibility(
static int rocksdb_init_func(void *const p) {
DBUG_ENTER_FUNC();
+ if (rdb_check_rocksdb_corruption()) {
+ sql_print_error("RocksDB: There was a corruption detected in RockDB files. "
+ "Check error log emitted earlier for more details.");
+ if (rocksdb_allow_to_start_after_corruption) {
+ sql_print_information(
+ "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent "
+ "server operating if RocksDB corruption is detected.");
+ } else {
+ sql_print_error("RocksDB: The server will exit normally and stop restart "
+ "attempts. Remove %s file from data directory and "
+ "start mysqld manually.",
+ rdb_corruption_marker_file_name().c_str());
+ exit(0);
+ }
+ }
+
// Validate the assumption about the size of ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN.
static_assert(sizeof(longlong) == 8, "Assuming that longlong is 8 bytes.");
@@ -4030,6 +4303,16 @@ static int rocksdb_init_func(void *const p) {
rocksdb_hton->tablefile_extensions= ha_rocksdb_exts;
DBUG_ASSERT(!mysqld_embedded);
+ if (rocksdb_db_options->max_open_files > (long)open_files_limit) {
+ sql_print_information("RocksDB: rocksdb_max_open_files should not be "
+ "greater than the open_files_limit, effective value "
+ "of rocksdb_max_open_files is being set to "
+ "open_files_limit / 2.");
+ rocksdb_db_options->max_open_files = open_files_limit / 2;
+ } else if (rocksdb_db_options->max_open_files == -2) {
+ rocksdb_db_options->max_open_files = open_files_limit / 2;
+ }
+
rocksdb_stats = rocksdb::CreateDBStatistics();
rocksdb_db_options->statistics = rocksdb_stats;
@@ -4078,14 +4361,20 @@ static int rocksdb_init_func(void *const p) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
+ if (rocksdb_db_options->allow_mmap_writes &&
+ rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 "
+ "to use allow_mmap_writes");
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
// sst_file_manager will move deleted rocksdb sst files to trash_dir
// to be deleted in a background thread.
std::string trash_dir = std::string(rocksdb_datadir) + "/trash";
- rocksdb_db_options->sst_file_manager.reset(
- NewSstFileManager(rocksdb_db_options->env, myrocks_logger, trash_dir));
-
- rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond(
- rocksdb_sst_mgr_rate_bytes_per_sec);
+ rocksdb_db_options->sst_file_manager.reset(NewSstFileManager(
+ rocksdb_db_options->env, myrocks_logger, trash_dir,
+ rocksdb_sst_mgr_rate_bytes_per_sec, true /* delete_existing_trash */));
std::vector<std::string> cf_names;
rocksdb::Status status;
@@ -4158,9 +4447,15 @@ static int rocksdb_init_func(void *const p) {
if (rocksdb_persistent_cache_size_mb > 0) {
std::shared_ptr<rocksdb::PersistentCache> pcache;
uint64_t cache_size_bytes= rocksdb_persistent_cache_size_mb * 1024 * 1024;
- rocksdb::NewPersistentCache(
+ status = rocksdb::NewPersistentCache(
rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path),
cache_size_bytes, myrocks_logger, true, &pcache);
+ if (!status.ok()) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Persistent cache returned error: (%s)",
+ status.getState());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
rocksdb_tbl_options->persistent_cache = pcache;
} else if (strlen(rocksdb_persistent_cache_path)) {
sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb");
@@ -4479,6 +4774,33 @@ static inline void rocksdb_smart_next(bool seek_backward,
}
}
+#ifndef NDEBUG
+// simulate that RocksDB has reported corrupted data
+static void dbug_change_status_to_corrupted(rocksdb::Status *status) {
+ *status = rocksdb::Status::Corruption();
+}
+#endif
+
+// If the iterator is not valid it might be because of EOF but might be due
+// to IOError or corruption. The good practice is always check it.
+// https://github.com/facebook/rocksdb/wiki/Iterator#error-handling
+static inline bool is_valid(rocksdb::Iterator *scan_it) {
+ if (scan_it->Valid()) {
+ return true;
+ } else {
+ rocksdb::Status s = scan_it->status();
+ DBUG_EXECUTE_IF("rocksdb_return_status_corrupted",
+ dbug_change_status_to_corrupted(&s););
+ if (s.IsIOError() || s.IsCorruption()) {
+ if (s.IsCorruption()) {
+ rdb_persist_corruption_marker();
+ }
+ rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
+ }
+ return false;
+ }
+}
+
/**
@brief
Example of simple lock controls. The "table_handler" it creates is a
@@ -4555,28 +4877,124 @@ std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const {
return names;
}
+/*
+ Inspired by innobase_get_int_col_max_value from InnoDB. This returns the
+ maximum value a type can take on.
+*/
+static ulonglong rdb_get_int_col_max_value(const Field *field) {
+ ulonglong max_value = 0;
+ switch (field->key_type()) {
+ case HA_KEYTYPE_BINARY:
+ max_value = 0xFFULL;
+ break;
+ case HA_KEYTYPE_INT8:
+ max_value = 0x7FULL;
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ max_value = 0xFFFFULL;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ max_value = 0x7FFFULL;
+ break;
+ case HA_KEYTYPE_UINT24:
+ max_value = 0xFFFFFFULL;
+ break;
+ case HA_KEYTYPE_INT24:
+ max_value = 0x7FFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ max_value = 0xFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ max_value = 0x7FFFFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ max_value = 0xFFFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONGLONG:
+ max_value = 0x7FFFFFFFFFFFFFFFULL;
+ break;
+ default:
+ abort();
+ }
+
+ return max_value;
+}
+
void ha_rocksdb::load_auto_incr_value() {
+ ulonglong auto_incr = 0;
+ bool validate_last = false, use_datadic = true;
+#ifndef NDEBUG
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;);
+ validate_last = true;
+#endif
+
+ if (use_datadic && dict_manager.get_auto_incr_val(
+ m_tbl_def->get_autoincr_gl_index_id(), &auto_incr)) {
+ update_auto_incr_val(auto_incr);
+ }
+
+ // If we find nothing in the data dictionary, or if we are in debug mode,
+ // then call index_last to get the last value.
+ //
+ // This is needed when upgrading from a server that did not support
+ // persistent auto_increment, of if the table is empty.
+ //
+ // For debug mode, we are just verifying that the data dictionary value is
+ // greater than or equal to the maximum value in the table.
+ if (auto_incr == 0 || validate_last) {
+ auto_incr = load_auto_incr_value_from_index();
+ update_auto_incr_val(auto_incr);
+ }
+
+ // If we failed to find anything from the data dictionary and index, then
+ // initialize auto_increment to 1.
+ if (m_tbl_def->m_auto_incr_val == 0) {
+ update_auto_incr_val(1);
+ }
+}
+
+ulonglong ha_rocksdb::load_auto_incr_value_from_index() {
const int save_active_index = active_index;
active_index = table->s->next_number_index;
const uint8 save_table_status = table->status;
+ ulonglong last_val = 0;
- /*
- load_auto_incr_value() may be called by statements that
- do not execute implicit commits (i.e. SHOW CREATE TABLE).
- index_last() creates a snapshot. When a snapshot is created
- here, it has to be released as well. (GitHub issue#189)
- */
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
const bool is_new_snapshot = !tx->has_snapshot();
+ if (is_new_snapshot) {
+ tx->acquire_snapshot(true);
+ }
// Do a lookup. We only need index column, so it should be index-only.
- // (another reason to make it index-only is that table->read_set is
- // not set appropriately and non-index-only lookup will not read the value)
+ // (another reason to make it index-only is that table->read_set is not set
+ // appropriately and non-index-only lookup will not read the value)
const bool save_keyread_only = m_keyread_only;
m_keyread_only = true;
+ m_key_requested = true;
- if (!index_last(table->record[0]))
- update_auto_incr_val();
+ if (!index_last(table->record[0])) {
+ Field *field =
+ table->key_info[table->s->next_number_index].key_part[0].field;
+ ulonglong max_val = rdb_get_int_col_max_value(field);
+ my_bitmap_map *const old_map =
+ dbug_tmp_use_all_columns(table, table->read_set);
+ last_val = field->val_int();
+ if (last_val != max_val) {
+ last_val++;
+ }
+#ifndef NDEBUG
+ ulonglong dd_val;
+ if (last_val <= max_val) {
+ const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id();
+ if (dict_manager.get_auto_incr_val(gl_index_id, &dd_val) &&
+ tx->get_auto_incr(gl_index_id) == 0) {
+ DBUG_ASSERT(dd_val >= last_val);
+ }
+ }
+#endif
+ dbug_tmp_restore_column_map(table->read_set, old_map);
+ }
m_keyread_only = save_keyread_only;
if (is_new_snapshot) {
@@ -4589,38 +5007,46 @@ void ha_rocksdb::load_auto_incr_value() {
/*
Do what ha_rocksdb::index_end() does.
(Why don't we use index_init/index_end? class handler defines index_init
- as private, for some reason).
- */
+ as private, for some reason).
+ */
release_scan_iterator();
+
+ return last_val;
}
-/* Get PK value from table->record[0]. */
-/*
- TODO(alexyang): No existing support for auto_increment on non-pk columns, see
- end of ha_rocksdb::create. Also see opened issue here:
- https://github.com/facebook/mysql-5.6/issues/153
-*/
-void ha_rocksdb::update_auto_incr_val() {
+void ha_rocksdb::update_auto_incr_val(ulonglong val) {
+ ulonglong auto_incr_val = m_tbl_def->m_auto_incr_val;
+ while (
+ auto_incr_val < val &&
+ !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, val)) {
+ // Do nothing - just loop until auto_incr_val is >= val or we successfully
+ // set it
+ }
+}
+
+void ha_rocksdb::update_auto_incr_val_from_field() {
Field *field;
- longlong new_val;
+ ulonglong new_val, max_val;
field = table->key_info[table->s->next_number_index].key_part[0].field;
+ max_val = rdb_get_int_col_max_value(field);
my_bitmap_map *const old_map =
dbug_tmp_use_all_columns(table, table->read_set);
new_val = field->val_int();
// don't increment if we would wrap around
- if (new_val != std::numeric_limits<longlong>::max()) {
+ if (new_val != max_val) {
new_val++;
}
dbug_tmp_restore_column_map(table->read_set, old_map);
- longlong auto_incr_val = m_tbl_def->m_auto_incr_val;
- while (auto_incr_val < new_val &&
- !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val,
- new_val)) {
- // Do nothing - just loop until auto_incr_val is >= new_val or
- // we successfully set it
+ // Only update if positive value was set for auto_incr column.
+ if (new_val <= max_val) {
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ tx->set_auto_incr(m_tbl_def->get_autoincr_gl_index_id(), new_val);
+
+ // Update the in memory auto_incr value in m_tbl_def.
+ update_auto_incr_val(new_val);
}
}
@@ -4632,12 +5058,12 @@ int ha_rocksdb::load_hidden_pk_value() {
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
const bool is_new_snapshot = !tx->has_snapshot();
+ longlong hidden_pk_id = 1;
// Do a lookup.
if (!index_last(table->record[0])) {
/*
Decode PK field from the key
*/
- longlong hidden_pk_id = 0;
auto err = read_hidden_pk_id_from_rowkey(&hidden_pk_id);
if (err) {
if (is_new_snapshot) {
@@ -4647,11 +5073,11 @@ int ha_rocksdb::load_hidden_pk_value() {
}
hidden_pk_id++;
- longlong old = m_tbl_def->m_hidden_pk_val;
- while (
- old < hidden_pk_id &&
- !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) {
- }
+ }
+
+ longlong old = m_tbl_def->m_hidden_pk_val;
+ while (old < hidden_pk_id &&
+ !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) {
}
if (is_new_snapshot) {
@@ -4737,18 +5163,15 @@ ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton,
m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr),
m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr),
m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr),
- m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr),
- m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE),
- m_bulk_load_tx(nullptr), m_encoder_arr(nullptr),
+ m_dup_sk_packed_tuple_old(nullptr), m_eq_cond_lower_bound(nullptr),
+ m_eq_cond_upper_bound(nullptr), m_pack_buffer(nullptr),
+ m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), m_encoder_arr(nullptr),
m_row_checksums_checked(0), m_in_rpl_delete_rows(false),
- m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {
- // TODO(alexyang): create a valid PSI_mutex_key for this mutex
- mysql_mutex_init(0, &m_bulk_load_mutex, MY_MUTEX_INIT_FAST);
-}
+ m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {}
-bool ha_rocksdb::same_table(const ha_rocksdb &other) const {
- return m_tbl_def->base_tablename() == other.m_tbl_def->base_tablename();
+const std::string &ha_rocksdb::get_table_basename() const {
+ return m_tbl_def->base_tablename();
}
/**
@@ -4834,8 +5257,12 @@ bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd,
#ifndef NDEBUG
read_filter_ts += rdb_dbug_set_ttl_read_filter_ts();
#endif
- return ts + kd.m_ttl_duration + read_filter_ts <=
- static_cast<uint64>(curr_ts);
+ bool is_hide_ttl =
+ ts + kd.m_ttl_duration + read_filter_ts <= static_cast<uint64>(curr_ts);
+ if (is_hide_ttl) {
+ update_row_stats(ROWS_FILTERED);
+ }
+ return is_hide_ttl;
}
void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
@@ -5049,12 +5476,12 @@ int ha_rocksdb::convert_record_to_storage_format(
Setup which fields will be unpacked when reading rows
@detail
- Two special cases when we still unpack all fields:
+ Three special cases when we still unpack all fields:
- When this table is being updated (m_lock_rows==RDB_LOCK_WRITE).
- When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to
- read all
- fields to find whether there is a row checksum at the end. We could skip
- the fields instead of decoding them, but currently we do decoding.)
+ read all fields to find whether there is a row checksum at the end. We could
+ skip the fields instead of decoding them, but currently we do decoding.)
+ - On index merge as bitmap is cleared during that operation
@seealso
ha_rocksdb::setup_field_converters()
@@ -5062,20 +5489,29 @@ int ha_rocksdb::convert_record_to_storage_format(
*/
void ha_rocksdb::setup_read_decoders() {
m_decoders_vect.clear();
+ m_key_requested = false;
int last_useful = 0;
int skip_size = 0;
for (uint i = 0; i < table->s->fields; i++) {
+ // bitmap is cleared on index merge, but it still needs to decode columns
+ const bool field_requested =
+ m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums ||
+ bitmap_is_clear_all(table->read_set) ||
+ bitmap_is_set(table->read_set, table->field[i]->field_index);
+
// We only need the decoder if the whole record is stored.
if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) {
+ // the field potentially needs unpacking
+ if (field_requested) {
+ // the field is in the read set
+ m_key_requested = true;
+ }
continue;
}
- // bitmap is cleared on index merge, but it still needs to decode columns
- if (m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums ||
- bitmap_is_clear_all(table->read_set) ||
- bitmap_is_set(table->read_set, table->field[i]->field_index)) {
+ if (field_requested) {
// We will need to decode this field
m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size});
last_useful = m_decoders_vect.size();
@@ -5101,13 +5537,18 @@ void ha_rocksdb::setup_read_decoders() {
}
#ifndef NDEBUG
-void dbug_append_garbage_at_end(std::string &on_disk_rec) {
- on_disk_rec.append("abc");
+void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) {
+ std::string str(on_disk_rec->data(), on_disk_rec->size());
+ on_disk_rec->Reset();
+ str.append("abc");
+ on_disk_rec->PinSelf(rocksdb::Slice(str));
}
-void dbug_truncate_record(std::string &on_disk_rec) { on_disk_rec.resize(0); }
+void dbug_truncate_record(rocksdb::PinnableSlice *on_disk_rec) {
+ on_disk_rec->remove_suffix(on_disk_rec->size());
+}
-void dbug_modify_rec_varchar12(std::string &on_disk_rec) {
+void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) {
std::string res;
// The record is NULL-byte followed by VARCHAR(10).
// Put the NULL-byte
@@ -5116,7 +5557,8 @@ void dbug_modify_rec_varchar12(std::string &on_disk_rec) {
res.append("\xC", 1);
res.append("123456789ab", 12);
- on_disk_rec.assign(res);
+ on_disk_rec->Reset();
+ on_disk_rec->PinSelf(rocksdb::Slice(res));
}
void dbug_modify_key_varchar8(String &on_disk_rec) {
@@ -5138,16 +5580,15 @@ void dbug_create_err_inplace_alter() {
int ha_rocksdb::convert_record_from_storage_format(
const rocksdb::Slice *const key, uchar *const buf) {
+
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1",
- dbug_append_garbage_at_end(m_retrieved_record););
+ dbug_append_garbage_at_end(&m_retrieved_record););
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2",
- dbug_truncate_record(m_retrieved_record););
+ dbug_truncate_record(&m_retrieved_record););
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read3",
- dbug_modify_rec_varchar12(m_retrieved_record););
+ dbug_modify_rec_varchar12(&m_retrieved_record););
- const rocksdb::Slice retrieved_rec_slice(&m_retrieved_record.front(),
- m_retrieved_record.size());
- return convert_record_from_storage_format(key, &retrieved_rec_slice, buf);
+ return convert_record_from_storage_format(key, &m_retrieved_record, buf);
}
int ha_rocksdb::convert_blob_from_storage_format(
@@ -5310,9 +5751,13 @@ int ha_rocksdb::convert_record_from_storage_format(
Rdb_key_def::get_unpack_header_size(unpack_info[0]));
}
- int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
- unpack_info ? &unpack_slice : nullptr,
- false /* verify_checksum */);
+ int err = HA_EXIT_SUCCESS;
+ if (m_key_requested) {
+ err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
+ unpack_info ? &unpack_slice : nullptr,
+ false /* verify_checksum */);
+ }
+
if (err != HA_EXIT_SUCCESS) {
return err;
}
@@ -5556,6 +6001,11 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
m_pack_buffer =
reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_eq_cond_upper_bound =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_eq_cond_lower_bound =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+
/*
If inplace alter is happening, allocate special buffers for unique
secondary index duplicate checking.
@@ -5570,6 +6020,7 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
if (m_pk_tuple == nullptr || m_pk_packed_tuple == nullptr ||
m_sk_packed_tuple == nullptr || m_sk_packed_tuple_old == nullptr ||
m_end_key_packed_tuple == nullptr || m_pack_buffer == nullptr ||
+ m_eq_cond_upper_bound == nullptr || m_eq_cond_lower_bound == nullptr ||
(alloc_alter_buffers && (m_dup_sk_packed_tuple == nullptr ||
m_dup_sk_packed_tuple_old == nullptr))) {
// One or more of the above allocations failed. Clean up and exit
@@ -5608,6 +6059,12 @@ void ha_rocksdb::free_key_buffers() {
my_free(m_dup_sk_packed_tuple_old);
m_dup_sk_packed_tuple_old = nullptr;
+
+ my_free(m_eq_cond_upper_bound);
+ m_eq_cond_upper_bound = nullptr;
+
+ my_free(m_eq_cond_lower_bound);
+ m_eq_cond_lower_bound = nullptr;
}
#ifdef MARIAROCKS_NOT_YET
@@ -5763,11 +6220,13 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
*/
m_verify_row_debug_checksums = false;
- /* TODO: move the following to where TABLE_SHARE is opened: */
- if (table->found_next_number_field)
+ /* Load auto_increment value only once on first use. */
+ if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) {
load_auto_incr_value();
+ }
- if (has_hidden_pk(table) &&
+ /* Load hidden pk only once on first use. */
+ if (has_hidden_pk(table) && m_tbl_def->m_hidden_pk_val == 0 &&
(err = load_hidden_pk_value()) != HA_EXIT_SUCCESS) {
free_key_buffers();
DBUG_RETURN(err);
@@ -6740,6 +7199,20 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)];
+ if (create_info->auto_increment_value) {
+ bool autoinc_upgrade_test = false;
+ m_tbl_def->m_auto_incr_val = create_info->auto_increment_value;
+ DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;);
+ if (!autoinc_upgrade_test) {
+ auto s = dict_manager.put_auto_incr_val(
+ batch, m_tbl_def->get_autoincr_gl_index_id(),
+ m_tbl_def->m_auto_incr_val);
+ if (!s.ok()) {
+ goto error;
+ }
+ }
+ }
+
dict_manager.lock();
err = ddl_manager.put_and_write(m_tbl_def, batch);
if (err != HA_EXIT_SUCCESS) {
@@ -6755,23 +7228,6 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
dict_manager.unlock();
- if (create_info->auto_increment_value)
- m_tbl_def->m_auto_incr_val = create_info->auto_increment_value;
-
- /*
- We only support auto_increment at start of the PRIMARY KEY.
- */
- // Field *field;
- // if ((field= table_arg->next_number_field))
- /* TODO mdcallag: disable this for now to let UNIQUE indexes kind of work
- if ((field= table_arg->found_next_number_field))
- {
- int pk= table_arg->s->primary_key;
- Field *pk_field= table_arg->key_info[pk].key_part[0].field;
- if (field->field_index != pk_field->field_index)
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
- */
DBUG_RETURN(HA_EXIT_SUCCESS);
error:
@@ -6869,7 +7325,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
*/
rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice);
- while (m_scan_it->Valid()) {
+ while (is_valid(m_scan_it)) {
/*
We are using full key and we've hit an exact match, or...
@@ -6909,12 +7365,12 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
from the POV of the current transaction. If it has, try going to the next
key.
*/
- while (m_scan_it->Valid() && kd.has_ttl() &&
+ while (is_valid(m_scan_it) && kd.has_ttl() &&
should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) {
rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it);
}
- return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND;
+ return is_valid(m_scan_it) ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND;
}
int ha_rocksdb::position_to_correct_key(
@@ -7074,7 +7530,7 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
bool covered_lookup =
m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap);
- if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
pk_size =
kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
if (pk_size == RDB_INVALID_KEY_LEN) {
@@ -7175,7 +7631,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
/* Use STATUS_NOT_FOUND when record not found or some error occurred */
table->status = STATUS_NOT_FOUND;
- if (m_scan_it->Valid()) {
+ if (is_valid(m_scan_it)) {
rocksdb::Slice key = m_scan_it->key();
/* Check if we've ran out of records of this index */
@@ -7196,8 +7652,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
bool covered_lookup =
m_keyread_only || m_key_descr_arr[keyno]->covers_lookup(
table, &value, &m_lookup_bitmap);
- if (covered_lookup && m_lock_rows == RDB_LOCK_NONE &&
- !has_hidden_pk(table)) {
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
rc = m_key_descr_arr[keyno]->unpack_record(
table, buf, &key, &value, m_verify_row_debug_checksums);
global_stats.covered_secondary_key_lookups.inc();
@@ -7525,7 +7980,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
while (1) {
rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
- if (!m_scan_it->Valid()) {
+ if (!is_valid(m_scan_it)) {
table->status = STATUS_NOT_FOUND;
return HA_ERR_END_OF_FILE;
}
@@ -7789,7 +8244,7 @@ void dbug_dump_database(rocksdb::DB *const db) {
rocksdb::Status ha_rocksdb::get_for_update(
Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value) const {
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const {
DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE);
const bool exclusive = m_lock_rows != RDB_LOCK_READ;
@@ -7875,6 +8330,9 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
&m_retrieved_record);
}
+ DBUG_EXECUTE_IF("rocksdb_return_status_corrupted",
+ dbug_change_status_to_corrupted(&s););
+
if (!s.IsNotFound() && !s.ok()) {
DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
m_table_handler));
@@ -7885,9 +8343,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
if (found) {
/* If we found the record, but it's expired, pretend we didn't find it. */
if (!skip_ttl_check && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(*m_pk_descr,
- rocksdb::Slice(&m_retrieved_record.front(),
- m_retrieved_record.size()),
+ should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
tx->m_snapshot_timestamp)) {
DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
}
@@ -8060,11 +8516,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
DBUG_ASSERT(key != nullptr);
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
- if (kd.m_is_reverse_cf) {
- kd.get_supremum_key(key, &key_size);
- } else {
- kd.get_infimum_key(key, &key_size);
- }
+ kd.get_first_key(key, &key_size);
rocksdb::Slice index_key((const char *)key, key_size);
@@ -8652,23 +9104,21 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id,
The bloom filter may need to be disabled for this lookup.
*/
- const bool total_order_seek = !can_use_bloom_filter(
+ const bool total_order_seek = !check_bloom_and_set_bounds(
ha_thd(), kd, new_slice, all_parts_used);
const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache);
- /*
- psergey-todo: we just need to take lock, lookups not needed:
- */
- std::string dummy_value;
const rocksdb::Status s =
- get_for_update(row_info.tx, kd.get_cf(), new_slice, &dummy_value);
+ get_for_update(row_info.tx, kd.get_cf(), new_slice, nullptr);
if (!s.ok() && !s.IsNotFound()) {
return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
m_table_handler);
}
rocksdb::Iterator *const iter = row_info.tx->get_iterator(
- kd.get_cf(), total_order_seek, fill_cache, true /* read current data */,
+ kd.get_cf(), total_order_seek, fill_cache,
+ m_eq_cond_lower_bound_slice, m_eq_cond_upper_bound_slice,
+ true /* read current data */,
false /* acquire snapshot */);
/*
Need to scan the transaction to see if there is a duplicate key.
@@ -8711,9 +9161,7 @@ int ha_rocksdb::check_uniqueness_and_lock(
m_retrieved_record by check_and_lock_unique_pk().
*/
if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(*m_pk_descr,
- rocksdb::Slice(&m_retrieved_record.front(),
- m_retrieved_record.size()),
+ should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
(row_info.tx->m_snapshot_timestamp
? row_info.tx->m_snapshot_timestamp
: static_cast<int64_t>(std::time(nullptr))))) {
@@ -8762,109 +9210,48 @@ int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
const rocksdb::Slice &key,
const rocksdb::Slice &value, bool sort) {
DBUG_ENTER_FUNC();
-
+ int res;
rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
- DBUG_ASSERT(cf != nullptr);
- int res = HA_EXIT_SUCCESS;
+ // In the case of unsorted inserts, m_sst_info allocated here is not
+ // used to store the keys. It is still used to indicate when tables
+ // are switched.
+ if (m_sst_info == nullptr || m_sst_info->is_committed()) {
+ m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
+ kd.get_name(), cf, *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api)));
+ res = tx->start_bulk_load(this, m_sst_info);
+ if (res != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(res);
+ }
+ }
+ DBUG_ASSERT(m_sst_info);
if (sort) {
- GL_INDEX_ID kd_gl_id = kd.get_gl_index_id();
- auto it = m_key_merge.find(kd_gl_id);
- if (it == m_key_merge.end()) {
- m_key_merge.emplace(
- std::piecewise_construct, std::make_tuple(kd_gl_id),
- std::make_tuple(
- thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size),
- THDVAR(ha_thd(), merge_combine_read_size),
- THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf));
- it = m_key_merge.find(kd_gl_id);
- if ((res = it->second.init()) != 0) {
- DBUG_RETURN(res);
- }
+ Rdb_index_merge *key_merge;
+ DBUG_ASSERT(cf != nullptr);
- if (m_bulk_load_tx == nullptr) {
- tx->start_bulk_load(this);
- m_bulk_load_tx = tx;
- }
+ res = tx->get_key_merge(kd.get_gl_index_id(), cf, &key_merge);
+ if (res == HA_EXIT_SUCCESS) {
+ res = key_merge->add(key, value);
}
- res = it->second.add(key, value);
} else {
- if (!m_sst_info) {
- m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
- kd.get_name(), cf, *rocksdb_db_options,
- THDVAR(ha_thd(), trace_sst_api)));
- tx->start_bulk_load(this);
- m_bulk_load_tx = tx;
- }
-
- DBUG_ASSERT(m_sst_info);
-
res = m_sst_info->put(key, value);
}
DBUG_RETURN(res);
}
-int ha_rocksdb::finalize_bulk_load() {
+int ha_rocksdb::finalize_bulk_load(bool print_client_error) {
DBUG_ENTER_FUNC();
- DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info,
- m_bulk_load_tx != nullptr);
-
- /* Skip if there are no possible ongoing bulk loads */
- if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) {
- DBUG_RETURN(HA_EXIT_SUCCESS);
- }
-
int res = HA_EXIT_SUCCESS;
- RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex);
-
+ /* Skip if there are no possible ongoing bulk loads */
if (m_sst_info) {
- res = m_sst_info->commit();
+ res = m_sst_info->commit(print_client_error);
m_sst_info.reset();
}
-
- if (!m_key_merge.empty()) {
- rocksdb::Slice merge_key;
- rocksdb::Slice merge_val;
- for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
- const std::string &index_name =
- ddl_manager.safe_find(it->first)->get_name();
- Rdb_index_merge &rdb_merge = it->second;
- Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name,
- rdb_merge.get_cf(), *rocksdb_db_options,
- THDVAR(ha_thd(), trace_sst_api));
-
- while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) {
- if ((res = sst_info.put(merge_key, merge_val)) != 0) {
- break;
- }
- }
- // res == -1 => finished ok; res > 0 => error
- if (res <= 0) {
- if ((res = sst_info.commit()) != 0) {
- break;
- }
- }
- }
- m_key_merge.clear();
-
- /*
- Explicitly tell jemalloc to clean up any unused dirty pages at this point.
- See https://reviews.facebook.net/D63723 for more details.
- */
- purge_all_jemalloc_arenas();
- }
-
- if (m_bulk_load_tx != nullptr) {
- m_bulk_load_tx->end_bulk_load(this);
- m_bulk_load_tx = nullptr;
- }
-
- RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex);
-
DBUG_RETURN(res);
}
@@ -8894,7 +9281,7 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
}
if (table->next_number_field) {
- update_auto_incr_val();
+ update_auto_incr_val_from_field();
}
int rc = HA_EXIT_SUCCESS;
@@ -9126,6 +9513,42 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
}
/*
+ Setting iterator upper/lower bounds for Seek/SeekForPrev.
+ This makes RocksDB to avoid scanning tombstones outside of
+ the given key ranges, when prefix_same_as_start=true was not passed
+ (when prefix bloom filter can not be used).
+ Inversing upper/lower bound is necessary on reverse order CF.
+ This covers HA_READ_PREFIX_LAST* case as well. For example,
+ if given query eq condition was 12 bytes and condition was
+ 0x0000b3eb003f65c5e78858b8, and if doing HA_READ_PREFIX_LAST,
+ eq_cond_len was 11 (see calc_eq_cond_len() for details).
+ If the index was reverse order, upper bound would be
+ 0x0000b3eb003f65c5e78857, and lower bound would be
+ 0x0000b3eb003f65c5e78859. These cover given eq condition range.
+*/
+void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond) {
+ uint eq_cond_len = eq_cond.size();
+ DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE);
+ memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len);
+ kd.successor(m_eq_cond_upper_bound, eq_cond_len);
+ memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len);
+ kd.predecessor(m_eq_cond_lower_bound, eq_cond_len);
+
+ if (kd.m_is_reverse_cf) {
+ m_eq_cond_upper_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len);
+ m_eq_cond_lower_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len);
+ } else {
+ m_eq_cond_upper_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len);
+ m_eq_cond_lower_bound_slice =
+ rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len);
+ }
+}
+
+/*
Open a cursor
*/
@@ -9141,7 +9564,7 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
bool skip_bloom = true;
const rocksdb::Slice eq_cond(slice->data(), eq_cond_len);
- if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys)) {
+ if (check_bloom_and_set_bounds(ha_thd(), kd, eq_cond, use_all_keys)) {
skip_bloom = false;
}
@@ -9181,7 +9604,9 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
read_opts.snapshot = m_scan_it_snapshot;
m_scan_it = rdb->NewIterator(read_opts, kd.get_cf());
} else {
- m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache);
+ m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache,
+ m_eq_cond_lower_bound_slice,
+ m_eq_cond_upper_bound_slice);
}
m_scan_it_skips_bloom = skip_bloom;
}
@@ -9200,14 +9625,12 @@ void ha_rocksdb::release_scan_iterator() {
void ha_rocksdb::setup_iterator_for_rnd_scan() {
uint key_size;
- if (m_pk_descr->m_is_reverse_cf)
- m_pk_descr->get_supremum_key(m_pk_packed_tuple, &key_size);
- else
- m_pk_descr->get_infimum_key(m_pk_packed_tuple, &key_size);
+ m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
- setup_scan_iterator(*m_pk_descr, &table_key);
+ setup_scan_iterator(*m_pk_descr, &table_key, false,
+ Rdb_key_def::INDEX_NUMBER_SIZE);
m_scan_it->Seek(table_key);
m_skip_scan_it_next_call = true;
}
@@ -9279,7 +9702,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
#ifdef MARIAROCKS_NOT_YET
stats.rows_requested++;
#endif
- if (!m_scan_it || !m_scan_it->Valid()) {
+ if (!m_scan_it || !is_valid(m_scan_it)) {
/*
We can get here when SQL layer has called
@@ -9301,7 +9724,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
m_scan_it->Prev(); /* this call cannot fail */
}
- if (!m_scan_it->Valid()) {
+ if (!is_valid(m_scan_it)) {
rc = HA_ERR_END_OF_FILE;
break;
}
@@ -9610,6 +10033,10 @@ int ha_rocksdb::info(uint flag) {
uint64_t memtableCount;
uint64_t memtableSize;
+ // the stats below are calculated from skiplist wich is a probablistic
+ // data structure, so the results vary between test runs
+ // it also can return 0 for quite a large tables which means that
+ // cardinality for memtable only indxes will be reported as 0
rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
&memtableCount, &memtableSize);
@@ -9643,7 +10070,6 @@ int ha_rocksdb::info(uint flag) {
if (flag & HA_STATUS_CONST) {
ref_length = m_pk_descr->max_storage_fmt_length();
- // TODO: Needs to reimplement after having real index statistics
for (uint i = 0; i < m_tbl_def->m_key_count; i++) {
if (is_hidden_pk(i, table, m_tbl_def)) {
continue;
@@ -9927,20 +10353,6 @@ void ha_rocksdb::read_thd_vars(THD *const thd) {
m_checksums_pct = THDVAR(thd, checksums_pct);
}
-const char *ha_rocksdb::thd_rocksdb_tmpdir() {
- const char *tmp_dir = THDVAR(ha_thd(), tmpdir);
-
- /*
- We want to treat an empty string as nullptr, in these cases DDL operations
- will use the default --tmpdir passed to mysql instead.
- */
- if (tmp_dir != nullptr && *tmp_dir == '\0') {
- tmp_dir = nullptr;
- }
-
- return (tmp_dir);
-}
-
/**
@return
@@ -10131,6 +10543,13 @@ ha_rocksdb::get_range(const int &i,
return myrocks::get_range(*m_key_descr_arr[i], buf);
}
+/*
+ This function is called with total_order_seek=true, but
+ upper/lower bound setting is not necessary.
+ Boundary set is useful when there is no matching key,
+ but in drop_index_thread's case, it means index is marked as removed,
+ so no further seek will happen for the index id.
+*/
static bool is_myrocks_index_empty(
rocksdb::ColumnFamilyHandle *cfh, const bool is_reverse_cf,
const rocksdb::ReadOptions &read_opts,
@@ -10201,7 +10620,7 @@ void Rdb_drop_index_thread::run() {
"from cf id %u. MyRocks data dictionary may "
"get corrupted.",
d.cf_id);
- abort_with_stack_traces();
+ abort();
}
rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id);
DBUG_ASSERT(cfh);
@@ -10290,6 +10709,7 @@ int ha_rocksdb::delete_table(const char *const tablename) {
the persistent data dictionary).
*/
ddl_manager.remove(tbl, batch, true);
+
int err = dict_manager.commit(batch);
if (err) {
DBUG_RETURN(err);
@@ -10320,10 +10740,12 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
const Rdb_key_def &kd = *tbl->m_key_descr_arr[i];
kd.get_infimum_key(reinterpret_cast<uchar *>(key_buf), &key_len);
rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
-
+ const rocksdb::Slice table_key(key_buf, key_len);
+ setup_iterator_bounds(kd, table_key);
+ opts.iterate_lower_bound = &m_eq_cond_lower_bound_slice;
+ opts.iterate_upper_bound = &m_eq_cond_upper_bound_slice;
std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(opts, cf));
- const rocksdb::Slice table_key(key_buf, key_len);
it->Seek(table_key);
while (it->Valid()) {
const rocksdb::Slice key = it->key();
@@ -10402,6 +10824,7 @@ int ha_rocksdb::rename_table(const char *const from, const char *const to) {
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
dict_manager.lock();
+
if (ddl_manager.rename(from_str, to_str, batch)) {
rc = HA_ERR_NO_SUCH_TABLE;
} else {
@@ -10452,7 +10875,7 @@ int ha_rocksdb::extra(enum ha_extra_function operation) {
If the table has blobs, then they are part of m_retrieved_record.
This call invalidates them.
*/
- m_retrieved_record.clear();
+ m_retrieved_record.Reset();
break;
default:
break;
@@ -10620,24 +11043,21 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
std::unordered_map<rocksdb::ColumnFamilyHandle *, std::vector<rocksdb::Range>>
ranges;
std::unordered_set<GL_INDEX_ID> ids_to_check;
- std::unordered_map<GL_INDEX_ID, uint> ids_to_keyparts;
std::vector<uchar> buf(table_arg->s->keys * 2 *
Rdb_key_def::INDEX_NUMBER_SIZE);
+ std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats;
for (uint i = 0; i < table_arg->s->keys; i++) {
const auto bufp = &buf[i * 2 * Rdb_key_def::INDEX_NUMBER_SIZE];
const Rdb_key_def &kd = *m_key_descr_arr[i];
+ const GL_INDEX_ID index_id = kd.get_gl_index_id();
ranges[kd.get_cf()].push_back(get_range(i, bufp));
- ids_to_check.insert(kd.get_gl_index_id());
- ids_to_keyparts[kd.get_gl_index_id()] = kd.get_key_parts();
- }
- // for analyze statements, force flush on memtable to get accurate cardinality
- Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
- if (thd != nullptr && THDVAR(thd, flush_memtable_on_analyze) &&
- !rocksdb_pause_background_work) {
- for (auto it : ids_to_check) {
- rdb->Flush(rocksdb::FlushOptions(), cf_manager.get_cf(it.cf_id));
- }
+ ids_to_check.insert(index_id);
+ // Initialize the stats to 0. If there are no files that contain
+ // this gl_index_id, then 0 should be stored for the cached stats.
+ stats[index_id] = Rdb_index_stats(index_id);
+ DBUG_ASSERT(kd.get_key_parts() > 0);
+ stats[index_id].m_distinct_keys_per_prefix.resize(kd.get_key_parts());
}
// get RocksDB table properties for these ranges
@@ -10654,15 +11074,6 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
}
int num_sst = 0;
- // group stats per index id
- std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats;
- for (const auto &it : ids_to_check) {
- // Initialize the stats to 0. If there are no files that contain
- // this gl_index_id, then 0 should be stored for the cached stats.
- stats[it] = Rdb_index_stats(it);
- DBUG_ASSERT(ids_to_keyparts.count(it) > 0);
- stats[it].m_distinct_keys_per_prefix.resize(ids_to_keyparts[it]);
- }
for (const auto &it : props) {
std::vector<Rdb_index_stats> sst_stats;
Rdb_tbl_prop_coll::read_stats_from_tbl_props(it.second, &sst_stats);
@@ -10689,6 +11100,53 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
num_sst++;
}
+ // calculate memtable cardinality
+ Rdb_tbl_card_coll cardinality_collector(rocksdb_table_stats_sampling_pct);
+ auto read_opts = rocksdb::ReadOptions();
+ read_opts.read_tier = rocksdb::ReadTier::kMemtableTier;
+ for (uint i = 0; i < table_arg->s->keys; i++) {
+ const Rdb_key_def &kd = *m_key_descr_arr[i];
+ Rdb_index_stats &stat = stats[kd.get_gl_index_id()];
+
+ uchar r_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
+ auto r = get_range(i, r_buf);
+ uint64_t memtableCount;
+ uint64_t memtableSize;
+ rdb->GetApproximateMemTableStats(kd.get_cf(), r, &memtableCount,
+ &memtableSize);
+ if (memtableCount < (uint64_t)stat.m_rows / 10) {
+ // skip tables that already have enough stats from SST files to reduce
+ // overhead and avoid degradation of big tables stats by sampling from
+ // relatively tiny (less than 10% of full data set) memtable dataset
+ continue;
+ }
+
+ std::unique_ptr<rocksdb::Iterator> it = std::unique_ptr<rocksdb::Iterator>(
+ rdb->NewIterator(read_opts, kd.get_cf()));
+
+ uchar *first_key;
+ uint key_size;
+ if (is_pk(i, table, m_tbl_def)) {
+ first_key = m_pk_packed_tuple;
+ } else {
+ first_key = m_sk_packed_tuple;
+ }
+ kd.get_first_key(first_key, &key_size);
+ rocksdb::Slice first_index_key((const char *)first_key, key_size);
+
+ cardinality_collector.Reset();
+ for (it->Seek(first_index_key); is_valid(it.get()); it->Next()) {
+ const rocksdb::Slice key = it->key();
+ if (!kd.covers_key(key)) {
+ break; // end of this index
+ }
+ stat.m_rows++;
+
+ cardinality_collector.ProcessKey(key, &kd, &stat);
+ }
+ cardinality_collector.AdjustStats(&stat);
+ }
+
// set and persist new stats
ddl_manager.set_stats(stats);
ddl_manager.persist_stats(true);
@@ -10736,32 +11194,73 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
off = 1;
}
- longlong new_val;
+ Field *field;
+ ulonglong new_val, max_val;
+ field = table->key_info[table->s->next_number_index].key_part[0].field;
+ max_val = rdb_get_int_col_max_value(field);
// Local variable reference to simplify code below
- std::atomic<longlong> &auto_incr = m_tbl_def->m_auto_incr_val;
+ auto &auto_incr = m_tbl_def->m_auto_incr_val;
- if (inc == 1 && off == 1) {
+ if (inc == 1) {
+ DBUG_ASSERT(off == 1);
// Optimization for the standard case where we are always simply
// incrementing from the last position
// Use CAS operation in a loop to make sure automically get the next auto
- // increment value while ensuring tha we don't wrap around to a negative
+ // increment value while ensuring that we don't wrap around to a negative
// number.
+ //
+ // We set auto_incr to the min of max_val and new_val + 1. This means that
+ // if we're at the maximum, we should be returning the same value for
+ // multiple rows, resulting in duplicate key errors (as expected).
+ //
+ // If we return values greater than the max, the SQL layer will "truncate"
+ // the value anyway, but it means that we store invalid values into
+ // auto_incr that will be visible in SHOW CREATE TABLE.
new_val = auto_incr;
- while (new_val != std::numeric_limits<longlong>::max()) {
- if (auto_incr.compare_exchange_weak(new_val, new_val + 1)) {
+ while (new_val != std::numeric_limits<ulonglong>::max()) {
+ if (auto_incr.compare_exchange_weak(new_val,
+ std::min(new_val + 1, max_val))) {
break;
}
}
} else {
- // The next value can be more complicated if either `inc` or 'off' is not 1
- longlong last_val = auto_incr;
+ // The next value can be more complicated if either 'inc' or 'off' is not 1
+ ulonglong last_val = auto_incr;
// Loop until we can correctly update the atomic value
do {
- if (((last_val - off) / inc) ==
- (std::numeric_limits<longlong>::max() - off) / inc) {
+ DBUG_ASSERT(last_val > 0);
+ // Calculate the next value in the auto increment series: offset
+ // + N * increment where N is 0, 1, 2, ...
+ //
+ // For further information please visit:
+ // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
+ //
+ // The following is confusing so here is an explanation:
+ // To get the next number in the sequence above you subtract out the
+ // offset, calculate the next sequence (N * increment) and then add the
+ // offset back in.
+ //
+ // The additions are rearranged to avoid overflow. The following is
+ // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact
+ // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why:
+ //
+ // (a+b)/c
+ // = (a - a%c + a%c + b - b%c + b%c) / c
+ // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c
+ // = a/c + b/c + (a%c + b%c) / c
+ //
+ // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the
+ // following statement.
+ ulonglong n =
+ (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc;
+
+ // Check if n * inc + off will overflow. This can only happen if we have
+ // an UNSIGNED BIGINT field.
+ if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) {
+ DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max());
// The 'last_val' value is already equal to or larger than the largest
// value in the sequence. Continuing would wrap around (technically
// the behavior would be undefined). What should we do?
@@ -10773,31 +11272,30 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
// may not be in our sequence, but it is guaranteed to be equal
// to or larger than any other value already inserted.
//
- // For now I'm going to take option @2.
- new_val = std::numeric_limits<longlong>::max();
+ // For now I'm going to take option 2.
+ //
+ // Returning ULLONG_MAX from get_auto_increment will cause the SQL
+ // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to
+ // the SE API for get_auto_increment, inserts will fail with
+ // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but
+ // inserts will fail with ER_DUP_ENTRY for other types (or no failure
+ // if the column is in a non-unique SK).
+ new_val = std::numeric_limits<ulonglong>::max();
auto_incr = new_val; // Store the largest value into auto_incr
break;
}
- // Calculate the next value in the auto increment series:
- // offset + N * increment
- // where N is 0, 1, 2, ...
- //
- // For further information please visit:
- // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
- //
- // The following is confusing so here is an explanation:
- // To get the next number in the sequence above you subtract out
- // the offset, calculate the next sequence (N * increment) and then add
- // the offset back in.
- new_val = (((last_val - off) + (inc - 1)) / inc) * inc + off;
+ new_val = n * inc + off;
// Attempt to store the new value (plus 1 since m_auto_incr_val contains
// the next available value) into the atomic value. If the current
// value no longer matches what we have in 'last_val' this will fail and
// we will repeat the loop (`last_val` will automatically get updated
// with the current value).
- } while (!auto_incr.compare_exchange_weak(last_val, new_val + 1));
+ //
+ // See above explanation for inc == 1 for why we use std::min.
+ } while (!auto_incr.compare_exchange_weak(last_val,
+ std::min(new_val + 1, max_val)));
}
*first_value = new_val;
@@ -10874,7 +11372,8 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter(
my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
my_core::Alter_inplace_info::ADD_INDEX |
my_core::Alter_inplace_info::ALTER_PARTITIONED |
- my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) {
DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
}
@@ -10885,6 +11384,13 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter(
DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
}
+ /* We only support changing auto_increment for table options. */
+ if ((ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) &&
+ !(ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) {
+ DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
DBUG_RETURN(my_core::HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
}
@@ -10924,111 +11430,121 @@ bool ha_rocksdb::prepare_inplace_alter_table(
DBUG_ASSERT(altered_table != nullptr);
DBUG_ASSERT(ha_alter_info != nullptr);
- const uint old_n_keys = m_tbl_def->m_key_count;
+ Rdb_tbl_def *new_tdef = nullptr;
+ std::shared_ptr<Rdb_key_def> *old_key_descr = nullptr;
+ std::shared_ptr<Rdb_key_def> *new_key_descr = nullptr;
+ uint old_n_keys = m_tbl_def->m_key_count;
uint new_n_keys = altered_table->s->keys;
+ std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes;
+ std::unordered_set<GL_INDEX_ID> dropped_index_ids;
+ uint n_dropped_keys = 0;
+ uint n_added_keys = 0;
+ ulonglong max_auto_incr = 0;
- if (has_hidden_pk(altered_table)) {
- new_n_keys += 1;
- }
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::DROP_INDEX |
+ my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
+ my_core::Alter_inplace_info::ADD_INDEX |
+ my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
- const TABLE *const old_table = table;
- std::shared_ptr<Rdb_key_def> *const old_key_descr =
- m_tbl_def->m_key_descr_arr;
- std::shared_ptr<Rdb_key_def> *const new_key_descr =
- new std::shared_ptr<Rdb_key_def>[new_n_keys];
+ if (has_hidden_pk(altered_table)) {
+ new_n_keys += 1;
+ }
- Rdb_tbl_def *const new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename());
- new_tdef->m_key_descr_arr = new_key_descr;
- new_tdef->m_key_count = new_n_keys;
- new_tdef->m_auto_incr_val =
- m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
- new_tdef->m_hidden_pk_val =
- m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ const TABLE *const old_table = table;
+ old_key_descr = m_tbl_def->m_key_descr_arr;
+ new_key_descr = new std::shared_ptr<Rdb_key_def>[new_n_keys];
- if (ha_alter_info->handler_flags &
- (my_core::Alter_inplace_info::DROP_INDEX |
- my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
- my_core::Alter_inplace_info::ADD_INDEX |
- my_core::Alter_inplace_info::ADD_UNIQUE_INDEX) &&
- create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
- /* Delete the new key descriptors */
- delete[] new_key_descr;
+ new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename());
+ new_tdef->m_key_descr_arr = new_key_descr;
+ new_tdef->m_key_count = new_n_keys;
+ new_tdef->m_auto_incr_val =
+ m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
+ new_tdef->m_hidden_pk_val =
+ m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
- /*
- Explicitly mark as nullptr so we don't accidentally remove entries
- from data dictionary on cleanup (or cause double delete[]).
- */
- new_tdef->m_key_descr_arr = nullptr;
- delete new_tdef;
+ if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
+ /* Delete the new key descriptors */
+ delete[] new_key_descr;
- my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
- DBUG_RETURN(HA_EXIT_FAILURE);
- }
+ /*
+ Explicitly mark as nullptr so we don't accidentally remove entries
+ from data dictionary on cleanup (or cause double delete[]).
+ */
+ new_tdef->m_key_descr_arr = nullptr;
+ delete new_tdef;
- std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes;
- std::unordered_set<GL_INDEX_ID> dropped_index_ids;
+ my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
- uint i;
- uint j;
+ uint i;
+ uint j;
- /* Determine which(if any) key definition(s) need to be dropped */
- for (i = 0; i < ha_alter_info->index_drop_count; i++) {
- const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i];
- for (j = 0; j < old_n_keys; j++) {
- const KEY *const old_key =
- &old_table->key_info[old_key_descr[j]->get_keyno()];
+ /* Determine which(if any) key definition(s) need to be dropped */
+ for (i = 0; i < ha_alter_info->index_drop_count; i++) {
+ const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i];
+ for (j = 0; j < old_n_keys; j++) {
+ const KEY *const old_key =
+ &old_table->key_info[old_key_descr[j]->get_keyno()];
- if (!compare_keys(old_key, dropped_key)) {
- dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id());
- break;
+ if (!compare_keys(old_key, dropped_key)) {
+ dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id());
+ break;
+ }
}
}
- }
- /* Determine which(if any) key definitions(s) need to be added */
- int identical_indexes_found = 0;
- for (i = 0; i < ha_alter_info->index_add_count; i++) {
- const KEY *const added_key =
- &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
- for (j = 0; j < new_n_keys; j++) {
- const KEY *const new_key =
- &altered_table->key_info[new_key_descr[j]->get_keyno()];
- if (!compare_keys(new_key, added_key)) {
- /*
- Check for cases where an 'identical' index is being dropped and
- re-added in a single ALTER statement. Turn this into a no-op as the
- index has not changed.
-
- E.G. Unique index -> non-unique index requires no change
+ /* Determine which(if any) key definitions(s) need to be added */
+ int identical_indexes_found = 0;
+ for (i = 0; i < ha_alter_info->index_add_count; i++) {
+ const KEY *const added_key =
+ &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
+ for (j = 0; j < new_n_keys; j++) {
+ const KEY *const new_key =
+ &altered_table->key_info[new_key_descr[j]->get_keyno()];
+ if (!compare_keys(new_key, added_key)) {
+ /*
+ Check for cases where an 'identical' index is being dropped and
+ re-added in a single ALTER statement. Turn this into a no-op as the
+ index has not changed.
+
+ E.G. Unique index -> non-unique index requires no change
+
+ Note that cases where the index name remains the same but the
+ key-parts are changed is already handled in create_inplace_key_defs.
+ In these cases the index needs to be rebuilt.
+ */
+ if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) {
+ dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id());
+ identical_indexes_found++;
+ } else {
+ added_indexes.insert(new_key_descr[j]);
+ }
- Note that cases where the index name remains the same but the
- key-parts are changed is already handled in create_inplace_key_defs.
- In these cases the index needs to be rebuilt.
- */
- if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) {
- dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id());
- identical_indexes_found++;
- } else {
- added_indexes.insert(new_key_descr[j]);
+ break;
}
-
- break;
}
}
- }
- const uint n_dropped_keys =
- ha_alter_info->index_drop_count - identical_indexes_found;
- const uint n_added_keys =
- ha_alter_info->index_add_count - identical_indexes_found;
- DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys);
- DBUG_ASSERT(added_indexes.size() == n_added_keys);
- DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys));
+ n_dropped_keys = ha_alter_info->index_drop_count - identical_indexes_found;
+ n_added_keys = ha_alter_info->index_add_count - identical_indexes_found;
+ DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys);
+ DBUG_ASSERT(added_indexes.size() == n_added_keys);
+ DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys));
+ }
+ if (ha_alter_info->handler_flags &
+ my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ if (!new_tdef) {
+ new_tdef = m_tbl_def;
+ }
+ max_auto_incr = load_auto_incr_value_from_index();
+ }
ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx(
new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys,
- added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys);
-
+ added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys,
+ max_auto_incr);
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -11182,9 +11698,10 @@ int ha_rocksdb::inplace_populate_sk(
bool is_unique_index =
new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME;
- Rdb_index_merge rdb_merge(
- thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size,
- rdb_merge_tmp_file_removal_delay, index->get_cf());
+ Rdb_index_merge rdb_merge(tx->get_rocksdb_tmpdir(), rdb_merge_buf_size,
+ rdb_merge_combine_read_size,
+ rdb_merge_tmp_file_removal_delay,
+ index->get_cf());
if ((res = rdb_merge.init())) {
DBUG_RETURN(res);
@@ -11488,6 +12005,29 @@ bool ha_rocksdb::commit_inplace_alter_table(
rdb_drop_idx_thread.signal();
}
+ if (ha_alter_info->handler_flags &
+ (my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) {
+ const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+ std::unordered_set<GL_INDEX_ID> create_index_ids;
+
+ ulonglong auto_incr_val = ha_alter_info->create_info->auto_increment_value;
+
+ for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) {
+ Rdb_inplace_alter_ctx *const ctx =
+ static_cast<Rdb_inplace_alter_ctx *>(*pctx);
+ auto_incr_val = std::max(auto_incr_val, ctx->m_max_auto_incr);
+ dict_manager.put_auto_incr_val(
+ batch, ctx->m_new_tdef->get_autoincr_gl_index_id(), auto_incr_val,
+ true /* overwrite */);
+ ctx->m_new_tdef->m_auto_incr_val = auto_incr_val;
+ }
+
+ if (dict_manager.commit(batch)) {
+ DBUG_ASSERT(0);
+ }
+ }
+
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -11515,15 +12055,29 @@ struct rocksdb_status_counters_t {
uint64_t block_cache_miss;
uint64_t block_cache_hit;
uint64_t block_cache_add;
+ uint64_t block_cache_add_failures;
uint64_t block_cache_index_miss;
uint64_t block_cache_index_hit;
+ uint64_t block_cache_index_add;
+ uint64_t block_cache_index_bytes_insert;
+ uint64_t block_cache_index_bytes_evict;
uint64_t block_cache_filter_miss;
uint64_t block_cache_filter_hit;
+ uint64_t block_cache_filter_add;
+ uint64_t block_cache_filter_bytes_insert;
+ uint64_t block_cache_filter_bytes_evict;
+ uint64_t block_cache_bytes_read;
+ uint64_t block_cache_bytes_write;
+ uint64_t block_cache_data_bytes_insert;
uint64_t block_cache_data_miss;
uint64_t block_cache_data_hit;
+ uint64_t block_cache_data_add;
uint64_t bloom_filter_useful;
uint64_t memtable_hit;
uint64_t memtable_miss;
+ uint64_t get_hit_l0;
+ uint64_t get_hit_l1;
+ uint64_t get_hit_l2_and_up;
uint64_t compaction_key_drop_new;
uint64_t compaction_key_drop_obsolete;
uint64_t compaction_key_drop_user;
@@ -11532,11 +12086,17 @@ struct rocksdb_status_counters_t {
uint64_t number_keys_updated;
uint64_t bytes_written;
uint64_t bytes_read;
+ uint64_t number_db_seek;
+ uint64_t number_db_seek_found;
+ uint64_t number_db_next;
+ uint64_t number_db_next_found;
+ uint64_t number_db_prev;
+ uint64_t number_db_prev_found;
+ uint64_t iter_bytes_read;
uint64_t no_file_closes;
uint64_t no_file_opens;
uint64_t no_file_errors;
uint64_t stall_micros;
- uint64_t rate_limit_delay_millis;
uint64_t num_iterators;
uint64_t number_multiget_get;
uint64_t number_multiget_keys_read;
@@ -11569,15 +12129,29 @@ static rocksdb_status_counters_t rocksdb_status_counters;
DEF_SHOW_FUNC(block_cache_miss, BLOCK_CACHE_MISS)
DEF_SHOW_FUNC(block_cache_hit, BLOCK_CACHE_HIT)
DEF_SHOW_FUNC(block_cache_add, BLOCK_CACHE_ADD)
+DEF_SHOW_FUNC(block_cache_add_failures, BLOCK_CACHE_ADD_FAILURES)
DEF_SHOW_FUNC(block_cache_index_miss, BLOCK_CACHE_INDEX_MISS)
DEF_SHOW_FUNC(block_cache_index_hit, BLOCK_CACHE_INDEX_HIT)
+DEF_SHOW_FUNC(block_cache_index_add, BLOCK_CACHE_INDEX_ADD)
+DEF_SHOW_FUNC(block_cache_index_bytes_insert, BLOCK_CACHE_INDEX_BYTES_INSERT)
+DEF_SHOW_FUNC(block_cache_index_bytes_evict, BLOCK_CACHE_INDEX_BYTES_EVICT)
DEF_SHOW_FUNC(block_cache_filter_miss, BLOCK_CACHE_FILTER_MISS)
DEF_SHOW_FUNC(block_cache_filter_hit, BLOCK_CACHE_FILTER_HIT)
+DEF_SHOW_FUNC(block_cache_filter_add, BLOCK_CACHE_FILTER_ADD)
+DEF_SHOW_FUNC(block_cache_filter_bytes_insert, BLOCK_CACHE_FILTER_BYTES_INSERT)
+DEF_SHOW_FUNC(block_cache_filter_bytes_evict, BLOCK_CACHE_FILTER_BYTES_EVICT)
+DEF_SHOW_FUNC(block_cache_bytes_read, BLOCK_CACHE_BYTES_READ)
+DEF_SHOW_FUNC(block_cache_bytes_write, BLOCK_CACHE_BYTES_WRITE)
+DEF_SHOW_FUNC(block_cache_data_bytes_insert, BLOCK_CACHE_DATA_BYTES_INSERT)
DEF_SHOW_FUNC(block_cache_data_miss, BLOCK_CACHE_DATA_MISS)
DEF_SHOW_FUNC(block_cache_data_hit, BLOCK_CACHE_DATA_HIT)
+DEF_SHOW_FUNC(block_cache_data_add, BLOCK_CACHE_DATA_ADD)
DEF_SHOW_FUNC(bloom_filter_useful, BLOOM_FILTER_USEFUL)
DEF_SHOW_FUNC(memtable_hit, MEMTABLE_HIT)
DEF_SHOW_FUNC(memtable_miss, MEMTABLE_MISS)
+DEF_SHOW_FUNC(get_hit_l0, GET_HIT_L0)
+DEF_SHOW_FUNC(get_hit_l1, GET_HIT_L1)
+DEF_SHOW_FUNC(get_hit_l2_and_up, GET_HIT_L2_AND_UP)
DEF_SHOW_FUNC(compaction_key_drop_new, COMPACTION_KEY_DROP_NEWER_ENTRY)
DEF_SHOW_FUNC(compaction_key_drop_obsolete, COMPACTION_KEY_DROP_OBSOLETE)
DEF_SHOW_FUNC(compaction_key_drop_user, COMPACTION_KEY_DROP_USER)
@@ -11586,11 +12160,17 @@ DEF_SHOW_FUNC(number_keys_read, NUMBER_KEYS_READ)
DEF_SHOW_FUNC(number_keys_updated, NUMBER_KEYS_UPDATED)
DEF_SHOW_FUNC(bytes_written, BYTES_WRITTEN)
DEF_SHOW_FUNC(bytes_read, BYTES_READ)
+DEF_SHOW_FUNC(number_db_seek, NUMBER_DB_SEEK)
+DEF_SHOW_FUNC(number_db_seek_found, NUMBER_DB_SEEK_FOUND)
+DEF_SHOW_FUNC(number_db_next, NUMBER_DB_NEXT)
+DEF_SHOW_FUNC(number_db_next_found, NUMBER_DB_NEXT_FOUND)
+DEF_SHOW_FUNC(number_db_prev, NUMBER_DB_PREV)
+DEF_SHOW_FUNC(number_db_prev_found, NUMBER_DB_PREV_FOUND)
+DEF_SHOW_FUNC(iter_bytes_read, ITER_BYTES_READ)
DEF_SHOW_FUNC(no_file_closes, NO_FILE_CLOSES)
DEF_SHOW_FUNC(no_file_opens, NO_FILE_OPENS)
DEF_SHOW_FUNC(no_file_errors, NO_FILE_ERRORS)
DEF_SHOW_FUNC(stall_micros, STALL_MICROS)
-DEF_SHOW_FUNC(rate_limit_delay_millis, RATE_LIMIT_DELAY_MILLIS)
DEF_SHOW_FUNC(num_iterators, NO_ITERATORS)
DEF_SHOW_FUNC(number_multiget_get, NUMBER_MULTIGET_CALLS)
DEF_SHOW_FUNC(number_multiget_keys_read, NUMBER_MULTIGET_KEYS_READ)
@@ -11624,6 +12204,7 @@ static void myrocks_update_status() {
export_stats.rows_updated = global_stats.rows[ROWS_UPDATED];
export_stats.rows_deleted_blind = global_stats.rows[ROWS_DELETED_BLIND];
export_stats.rows_expired = global_stats.rows[ROWS_EXPIRED];
+ export_stats.rows_filtered = global_stats.rows[ROWS_FILTERED];
export_stats.system_rows_deleted = global_stats.system_rows[ROWS_DELETED];
export_stats.system_rows_inserted = global_stats.system_rows[ROWS_INSERTED];
@@ -11662,6 +12243,8 @@ static SHOW_VAR myrocks_status_variables[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("rows_expired", &export_stats.rows_expired,
SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_filtered", &export_stats.rows_filtered,
+ SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("system_rows_deleted",
&export_stats.system_rows_deleted, SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("system_rows_inserted",
@@ -11780,15 +12363,29 @@ static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(block_cache_miss),
DEF_STATUS_VAR(block_cache_hit),
DEF_STATUS_VAR(block_cache_add),
+ DEF_STATUS_VAR(block_cache_add_failures),
DEF_STATUS_VAR(block_cache_index_miss),
DEF_STATUS_VAR(block_cache_index_hit),
+ DEF_STATUS_VAR(block_cache_index_add),
+ DEF_STATUS_VAR(block_cache_index_bytes_insert),
+ DEF_STATUS_VAR(block_cache_index_bytes_evict),
DEF_STATUS_VAR(block_cache_filter_miss),
DEF_STATUS_VAR(block_cache_filter_hit),
+ DEF_STATUS_VAR(block_cache_filter_add),
+ DEF_STATUS_VAR(block_cache_filter_bytes_insert),
+ DEF_STATUS_VAR(block_cache_filter_bytes_evict),
+ DEF_STATUS_VAR(block_cache_bytes_read),
+ DEF_STATUS_VAR(block_cache_bytes_write),
+ DEF_STATUS_VAR(block_cache_data_bytes_insert),
DEF_STATUS_VAR(block_cache_data_miss),
DEF_STATUS_VAR(block_cache_data_hit),
+ DEF_STATUS_VAR(block_cache_data_add),
DEF_STATUS_VAR(bloom_filter_useful),
DEF_STATUS_VAR(memtable_hit),
DEF_STATUS_VAR(memtable_miss),
+ DEF_STATUS_VAR(get_hit_l0),
+ DEF_STATUS_VAR(get_hit_l1),
+ DEF_STATUS_VAR(get_hit_l2_and_up),
DEF_STATUS_VAR(compaction_key_drop_new),
DEF_STATUS_VAR(compaction_key_drop_obsolete),
DEF_STATUS_VAR(compaction_key_drop_user),
@@ -11797,11 +12394,17 @@ static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(number_keys_updated),
DEF_STATUS_VAR(bytes_written),
DEF_STATUS_VAR(bytes_read),
+ DEF_STATUS_VAR(number_db_seek),
+ DEF_STATUS_VAR(number_db_seek_found),
+ DEF_STATUS_VAR(number_db_next),
+ DEF_STATUS_VAR(number_db_next_found),
+ DEF_STATUS_VAR(number_db_prev),
+ DEF_STATUS_VAR(number_db_prev_found),
+ DEF_STATUS_VAR(iter_bytes_read),
DEF_STATUS_VAR(no_file_closes),
DEF_STATUS_VAR(no_file_opens),
DEF_STATUS_VAR(no_file_errors),
DEF_STATUS_VAR(stall_micros),
- DEF_STATUS_VAR(rate_limit_delay_millis),
DEF_STATUS_VAR(num_iterators),
DEF_STATUS_VAR(number_multiget_get),
DEF_STATUS_VAR(number_multiget_keys_read),
@@ -11827,12 +12430,14 @@ static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(number_superversion_releases),
DEF_STATUS_VAR(number_superversion_cleanups),
DEF_STATUS_VAR(number_block_not_compressed),
+ DEF_STATUS_VAR_PTR("row_lock_deadlocks", &rocksdb_row_lock_deadlocks,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_PTR("row_lock_wait_timeouts",
+ &rocksdb_row_lock_wait_timeouts, SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("snapshot_conflict_errors",
&rocksdb_snapshot_conflict_errors, SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("wal_group_syncs", &rocksdb_wal_group_syncs,
SHOW_LONGLONG),
- DEF_STATUS_VAR_PTR("number_stat_computes", &rocksdb_number_stat_computes,
- SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put,
SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete,
@@ -11899,8 +12504,8 @@ void Rdb_background_thread::run() {
// InnoDB's behavior. For mode never, the wal file isn't even written,
// whereas background writes to the wal file, but issues the syncs in a
// background thread.
- if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) {
- DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes);
+ if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) &&
+ !rocksdb_db_options->allow_mmap_writes) {
const rocksdb::Status s = rdb->FlushWAL(true);
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
@@ -11912,6 +12517,16 @@ void Rdb_background_thread::run() {
ddl_manager.persist_stats();
}
+bool ha_rocksdb::check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys) {
+ bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond, use_all_keys);
+ if (!can_use_bloom) {
+ setup_iterator_bounds(kd, eq_cond);
+ }
+ return can_use_bloom;
+}
+
/**
Deciding if it is possible to use bloom filter or not.
@@ -11930,9 +12545,9 @@ void Rdb_background_thread::run() {
@param use_all_keys True if all key parts are set with equal conditions.
This is aware of extended keys.
*/
-bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
- const rocksdb::Slice &eq_cond,
- const bool use_all_keys) {
+bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys) {
bool can_use = false;
if (THDVAR(thd, skip_bloom_filter_on_read)) {
@@ -12070,7 +12685,7 @@ void rdb_handle_io_error(const rocksdb::Status status,
rdb_log_status_error(status, "failed to write to WAL");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on WAL write error.");
- abort_with_stack_traces();
+ abort();
break;
}
case RDB_IO_ERROR_BG_THREAD: {
@@ -12081,7 +12696,7 @@ void rdb_handle_io_error(const rocksdb::Status status,
rdb_log_status_error(status, "failed on I/O");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on I/O error.");
- abort_with_stack_traces();
+ abort();
break;
}
default:
@@ -12090,16 +12705,17 @@ void rdb_handle_io_error(const rocksdb::Status status,
}
} else if (status.IsCorruption()) {
rdb_log_status_error(status, "data corruption detected!");
+ rdb_persist_corruption_marker();
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting because of data corruption.");
- abort_with_stack_traces();
+ abort();
} else if (!status.ok()) {
switch (err_type) {
case RDB_IO_ERROR_DICT_COMMIT: {
rdb_log_status_error(status, "Failed to write to WAL (dictionary)");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on WAL write error.");
- abort_with_stack_traces();
+ abort();
break;
}
default:
@@ -12256,11 +12872,43 @@ void rocksdb_set_collation_exception_list(THD *const thd,
*static_cast<const char**>(var_ptr) = val_copy;
}
-void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var
- MY_ATTRIBUTE((__unused__)),
- void *const var_ptr, const void *const save) {
- Rdb_transaction *&tx = get_tx_from_thd(thd);
+int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) {
+ int new_value_type = value->value_type(value);
+ if (new_value_type == MYSQL_VALUE_TYPE_STRING) {
+ char buf[16];
+ int len = sizeof(buf);
+ const char *str = value->val_str(value, buf, &len);
+ if (str && (my_strcasecmp(system_charset_info, "true", str) == 0 ||
+ my_strcasecmp(system_charset_info, "on", str) == 0)) {
+ *return_value = TRUE;
+ } else if (str && (my_strcasecmp(system_charset_info, "false", str) == 0 ||
+ my_strcasecmp(system_charset_info, "off", str) == 0)) {
+ *return_value = FALSE;
+ } else {
+ return 1;
+ }
+ } else if (new_value_type == MYSQL_VALUE_TYPE_INT) {
+ long long intbuf;
+ value->val_int(value, &intbuf);
+ if (intbuf > 1)
+ return 1;
+ *return_value = intbuf > 0 ? TRUE : FALSE;
+ } else {
+ return 1;
+ }
+
+ return 0;
+}
+int rocksdb_check_bulk_load(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value) {
+ my_bool new_value;
+ if (mysql_value_to_bool(value, &new_value) != 0) {
+ return 1;
+ }
+
+ Rdb_transaction *&tx = get_tx_from_thd(thd);
if (tx != nullptr) {
const int rc = tx->finish_bulk_load();
if (rc != 0) {
@@ -12268,30 +12916,32 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var
sql_print_error("RocksDB: Error %d finalizing last SST file while "
"setting bulk loading variable",
rc);
- /*
- MariaDB doesn't do the following:
- abort_with_stack_traces();
- because it doesn't seem a good idea to crash a server when a user makes
- a mistake.
- Instead, we return an error to the user. The error has already been
- produced inside ha_rocksdb::finalize_bulk_load().
- */
+ THDVAR(thd, bulk_load) = 0;
+ return 1;
}
}
- *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+ *static_cast<bool *>(save) = new_value;
+ return 0;
}
-void rocksdb_set_bulk_load_allow_unsorted(
- THD *const thd,
- struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
- void *const var_ptr, const void *const save) {
+int rocksdb_check_bulk_load_allow_unsorted(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value) {
+ my_bool new_value;
+ if (mysql_value_to_bool(value, &new_value) != 0) {
+ return 1;
+ }
+
if (THDVAR(thd, bulk_load)) {
my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET",
"Cannot change this setting while bulk load is enabled");
- } else {
- *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+
+ return 1;
}
+
+ *static_cast<bool *>(save) = new_value;
+ return 0;
}
static void rocksdb_set_max_background_jobs(THD *thd,
@@ -12322,35 +12972,116 @@ static void rocksdb_set_max_background_jobs(THD *thd,
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
-void rocksdb_set_update_cf_options(THD *const /* unused */,
- struct st_mysql_sys_var *const /* unused */,
- void *const var_ptr,
- const void *const save) {
+static void rocksdb_set_bytes_per_sync(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const ulonglong new_val = *static_cast<const ulonglong *>(save);
+
+ if (rocksdb_db_options->bytes_per_sync != new_val) {
+ rocksdb_db_options->bytes_per_sync = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"bytes_per_sync", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning("MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_wal_bytes_per_sync(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const ulonglong new_val = *static_cast<const ulonglong *>(save);
+
+ if (rocksdb_db_options->wal_bytes_per_sync != new_val) {
+ rocksdb_db_options->wal_bytes_per_sync = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"wal_bytes_per_sync", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning("MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static int
+rocksdb_validate_update_cf_options(THD * /* unused */,
+ struct st_mysql_sys_var * /*unused*/,
+ void *save, struct st_mysql_value *value) {
+
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ const char *str;
+ int length;
+ length = sizeof(buff);
+ str = value->val_str(value, buff, &length);
+ *(const char **)save = str;
+
+ if (str == nullptr) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ Rdb_cf_options::Name_to_config_t option_map;
+
+ // Basic sanity checking and parsing the options into a map. If this fails
+ // then there's no point to proceed.
+ if (!Rdb_cf_options::parse_cf_options(str, &option_map)) {
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str);
+ return HA_EXIT_FAILURE;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+static void
+rocksdb_set_update_cf_options(THD *const /* unused */,
+ struct st_mysql_sys_var *const /* unused */,
+ void *const var_ptr, const void *const save) {
const char *const val = *static_cast<const char *const *>(save);
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
if (!val) {
- // NO_LINT_DEBUG
- sql_print_warning("MyRocks: NULL is not a valid option for updates to "
- "column family settings.");
+ *reinterpret_cast<char **>(var_ptr) = nullptr;
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
return;
}
- RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
-
DBUG_ASSERT(val != nullptr);
+ // Reset the pointers regardless of how much success we had with updating
+ // the CF options. This will results in consistent behavior and avoids
+ // dealing with cases when only a subset of CF-s was successfully updated.
+ *reinterpret_cast<char **>(var_ptr) = my_strdup(val, MYF(0));
+
// Do the real work of applying the changes.
Rdb_cf_options::Name_to_config_t option_map;
- // Basic sanity checking and parsing the options into a map. If this fails
- // then there's no point to proceed.
+ // This should never fail, because of rocksdb_validate_update_cf_options
if (!Rdb_cf_options::parse_cf_options(val, &option_map)) {
my_free(*reinterpret_cast<char**>(var_ptr));
- *reinterpret_cast<char**>(var_ptr) = nullptr;
-
- // NO_LINT_DEBUG
- sql_print_warning("MyRocks: failed to parse the updated column family "
- "options = '%s'.", val);
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
return;
}
@@ -12410,16 +13141,7 @@ void rocksdb_set_update_cf_options(THD *const /* unused */,
}
}
- // Reset the pointers regardless of how much success we had with updating
- // the CF options. This will results in consistent behavior and avoids
- // dealing with cases when only a subset of CF-s was successfully updated.
- if (val) {
my_free(*reinterpret_cast<char**>(var_ptr));
- *reinterpret_cast<char**>(var_ptr) = my_strdup(val, MYF(0));
- } else {
- *reinterpret_cast<char**>(var_ptr) = nullptr;
- }
-
// Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to
// free up resources used before.
@@ -12488,6 +13210,12 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
DBUG_RETURN((rows / 20.0) + 1);
}
+std::string rdb_corruption_marker_file_name() {
+ std::string ret(rocksdb_datadir);
+ ret.append("/ROCKSDB_CORRUPTED");
+ return ret;
+}
+
} // namespace myrocks
/**
@@ -12533,5 +13261,6 @@ maria_declare_plugin(rocksdb_se){
myrocks::rdb_i_s_cfoptions, myrocks::rdb_i_s_compact_stats,
myrocks::rdb_i_s_global_info, myrocks::rdb_i_s_ddl,
myrocks::rdb_i_s_index_file_map, myrocks::rdb_i_s_lock_info,
- myrocks::rdb_i_s_trx_info
+ myrocks::rdb_i_s_trx_info,
+ myrocks::rdb_i_s_deadlock_info
maria_declare_plugin_end;
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 4432a4de8d1..8e4434efa8f 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -41,6 +41,7 @@
/* RocksDB header files */
#include "rocksdb/cache.h"
+#include "rocksdb/merge_operator.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/sst_file_manager.h"
#include "rocksdb/statistics.h"
@@ -93,6 +94,25 @@ struct Rdb_trx_info {
std::vector<Rdb_trx_info> rdb_get_all_trx_info();
/*
+ * class for exporting deadlock transaction information for
+ * information_schema.rocksdb_deadlock
+ */
+struct Rdb_deadlock_info {
+ struct Rdb_dl_trx_info {
+ ulonglong trx_id;
+ std::string cf_name;
+ std::string waiting_key;
+ bool exclusive_lock;
+ std::string index_name;
+ std::string table_name;
+ };
+ std::vector <Rdb_dl_trx_info> path;
+ ulonglong victim_trx_id;
+};
+
+std::vector<Rdb_deadlock_info> rdb_get_deadlock_info();
+
+/*
This is
- the name of the default Column Family (the CF which stores indexes which
didn't explicitly specify which CF they are in)
@@ -250,6 +270,8 @@ enum collations_used {
*/
#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
+#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong)
+
/*
Maximum index prefix length in bytes.
*/
@@ -367,6 +389,7 @@ enum operation_type : int {
ROWS_UPDATED,
ROWS_DELETED_BLIND,
ROWS_EXPIRED,
+ ROWS_FILTERED,
ROWS_HIDDEN_NO_SNAPSHOT,
ROWS_MAX
};
@@ -400,6 +423,7 @@ struct st_export_stats {
ulonglong rows_updated;
ulonglong rows_deleted_blind;
ulonglong rows_expired;
+ ulonglong rows_filtered;
ulonglong rows_hidden_no_snapshot;
ulonglong system_rows_deleted;
@@ -538,6 +562,12 @@ class ha_rocksdb : public my_core::handler {
uchar *m_dup_sk_packed_tuple;
uchar *m_dup_sk_packed_tuple_old;
+ /* Buffers used for passing upper/bound eq conditions. */
+ uchar *m_eq_cond_lower_bound;
+ uchar *m_eq_cond_upper_bound;
+ rocksdb::Slice m_eq_cond_lower_bound_slice;
+ rocksdb::Slice m_eq_cond_upper_bound_slice;
+
/*
Temporary space for packing VARCHARs (we provide it to
pack_record()/pack_index_tuple() calls).
@@ -567,7 +597,7 @@ class ha_rocksdb : public my_core::handler {
This is used only when we get the record with rocksdb's Get() call (The
other option is when we get a rocksdb::Slice from an iterator)
*/
- std::string m_retrieved_record;
+ rocksdb::PinnableSlice m_retrieved_record;
/* Type of locking to apply to rows */
enum { RDB_LOCK_NONE, RDB_LOCK_READ, RDB_LOCK_WRITE } m_lock_rows;
@@ -600,12 +630,7 @@ class ha_rocksdb : public my_core::handler {
bool m_update_scope_is_valid;
/* SST information used for bulk loading the primary key */
- std::unique_ptr<Rdb_sst_info> m_sst_info;
- /* External merge sorts for bulk load: key ID -> merge sort instance */
- std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
- Rdb_transaction *m_bulk_load_tx;
- /* Mutex to protect finalizing bulk load */
- mysql_mutex_t m_bulk_load_mutex;
+ std::shared_ptr<Rdb_sst_info> m_sst_info;
/*
MySQL index number for duplicate key error
@@ -620,13 +645,17 @@ class ha_rocksdb : public my_core::handler {
int secondary_index_read(const int keyno, uchar *const buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void setup_iterator_for_rnd_scan();
- void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *const slice)
- MY_ATTRIBUTE((__nonnull__)) {
- setup_scan_iterator(kd, slice, false, 0);
- }
bool is_ascending(const Rdb_key_def &keydef,
enum ha_rkey_function find_flag) const
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void setup_iterator_bounds(const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond);
+ bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys);
+ bool check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd,
+ const rocksdb::Slice &eq_cond,
+ const bool use_all_keys);
void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice,
const bool use_all_keys, const uint eq_cond_len)
MY_ATTRIBUTE((__nonnull__));
@@ -635,7 +664,8 @@ class ha_rocksdb : public my_core::handler {
rocksdb::Status
get_for_update(Rdb_transaction *const tx,
rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, std::string *const value) const;
+ const rocksdb::Slice &key,
+ rocksdb::PinnableSlice *value) const;
int get_row_by_rowid(uchar *const buf, const char *const rowid,
const uint rowid_size, const bool skip_lookup = false,
@@ -649,8 +679,13 @@ class ha_rocksdb : public my_core::handler {
rowid_size, skip_lookup, skip_ttl_check);
}
- void update_auto_incr_val();
void load_auto_incr_value();
+ ulonglong load_auto_incr_value_from_index();
+ void update_auto_incr_val(ulonglong val);
+ void update_auto_incr_val_from_field();
+ rocksdb::Status get_datadic_auto_incr(Rdb_transaction *const tx,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const;
longlong update_hidden_pk_val();
int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__));
int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id)
@@ -696,6 +731,12 @@ class ha_rocksdb : public my_core::handler {
*/
std::vector<READ_FIELD> m_decoders_vect;
+ /*
+ This tells if any field which is part of the key needs to be unpacked and
+ decoded.
+ */
+ bool m_key_requested = false;
+
/* Setup field_decoders based on type of scan and table->read_set */
void setup_read_decoders();
@@ -766,9 +807,12 @@ public:
my_core::TABLE_SHARE *const table_arg);
~ha_rocksdb() {
int err MY_ATTRIBUTE((__unused__));
- err = finalize_bulk_load();
- DBUG_ASSERT(err == 0);
- mysql_mutex_destroy(&m_bulk_load_mutex);
+ err = finalize_bulk_load(false);
+ if (err != 0) {
+ sql_print_error("RocksDB: Error %d finalizing bulk load while closing "
+ "handler.",
+ err);
+ }
}
/** @brief
@@ -794,10 +838,9 @@ public:
*/
/*
- See if this is the same base table - this should only be true for different
- partitions of the same table.
+ Returns the name of the table's base name
*/
- bool same_table(const ha_rocksdb &other) const;
+ const std::string &get_table_basename() const;
/** @brief
This is a list of flags that indicate what functionality the storage engine
@@ -1210,8 +1253,6 @@ private:
Rdb_tbl_def *get_table_if_exists(const char *const tablename)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void read_thd_vars(THD *const thd) MY_ATTRIBUTE((__nonnull__));
- const char *thd_rocksdb_tmpdir()
- MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
bool contains_foreign_key(THD *const thd)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -1221,6 +1262,9 @@ private:
const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int finalize_bulk_load(bool print_client_error = true)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
public:
int index_init(uint idx, bool sorted) override
MY_ATTRIBUTE((__warn_unused_result__));
@@ -1264,7 +1308,7 @@ public:
DBUG_ENTER_FUNC();
/* Free blob data */
- m_retrieved_record.clear();
+ m_retrieved_record.Reset();
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -1335,8 +1379,6 @@ public:
my_core::Alter_inplace_info *const ha_alter_info,
bool commit) override;
- int finalize_bulk_load() MY_ATTRIBUTE((__warn_unused_result__));
-
#ifdef MARIAROCKS_NOT_YET // MDEV-10976
void set_use_read_free_rpl(const char *const whitelist);
#endif
@@ -1391,18 +1433,22 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx {
/* Stores number of keys to drop */
const uint m_n_dropped_keys;
+ /* Stores the largest current auto increment value in the index */
+ const ulonglong m_max_auto_incr;
+
Rdb_inplace_alter_ctx(
Rdb_tbl_def *new_tdef, std::shared_ptr<Rdb_key_def> *old_key_descr,
std::shared_ptr<Rdb_key_def> *new_key_descr, uint old_n_keys,
uint new_n_keys,
std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes,
std::unordered_set<GL_INDEX_ID> dropped_index_ids, uint n_added_keys,
- uint n_dropped_keys)
+ uint n_dropped_keys, ulonglong max_auto_incr)
: my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef),
m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr),
m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys),
m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids),
- m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys) {}
+ m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys),
+ m_max_auto_incr(max_auto_incr) {}
~Rdb_inplace_alter_ctx() {}
@@ -1412,6 +1458,10 @@ private:
Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &);
};
+// file name indicating RocksDB data corruption
+std::string rdb_corruption_marker_file_name();
+
const int MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL= MariaDB_PLUGIN_MATURITY_BETA;
+
} // namespace myrocks
diff --git a/storage/rocksdb/myrocks_hotbackup b/storage/rocksdb/myrocks_hotbackup
new file mode 100755
index 00000000000..cb10bb902c0
--- /dev/null
+++ b/storage/rocksdb/myrocks_hotbackup
@@ -0,0 +1,686 @@
+#!/usr/bin/env python
+
+from __future__ import division
+from optparse import OptionParser
+import collections
+import signal
+import os
+import stat
+import sys
+import re
+import commands
+import subprocess
+import logging
+import logging.handlers
+import time
+import datetime
+import shutil
+import traceback
+import tempfile
+
+import MySQLdb
+import MySQLdb.connections
+from MySQLdb import OperationalError, ProgrammingError
+
+logger = None
+opts = None
+rocksdb_files = ['MANIFEST', 'CURRENT', 'OPTIONS']
+rocksdb_data_suffix = '.sst'
+rocksdb_wal_suffix = '.log'
+exclude_files = ['master.info', 'relay-log.info', 'worker-relay-log.info',
+ 'auto.cnf', 'gaplock.log', 'ibdata', 'ib_logfile', '.trash']
+wdt_bin = 'wdt'
+
+def is_manifest(fname):
+ for m in rocksdb_files:
+ if fname.startswith(m):
+ return True
+ return False
+
+class Writer(object):
+ a = None
+ def __init__(self):
+ a = None
+
+class StreamWriter(Writer):
+ stream_cmd= ''
+
+ def __init__(self, stream_option):
+ super(StreamWriter, self).__init__()
+ if stream_option == 'tar':
+ self.stream_cmd= 'tar chf -'
+ elif stream_option == 'xbstream':
+ self.stream_cmd= 'xbstream -c'
+ else:
+ raise Exception("Only tar or xbstream is supported as streaming option.")
+
+ def write(self, file_name):
+ rc= os.system(self.stream_cmd + " " + file_name)
+ if (rc != 0):
+ raise Exception("Got error on stream write: " + str(rc) + " " + file_name)
+
+
+class MiscFilesProcessor():
+ datadir = None
+ wildcard = r'.*\.[frm|MYD|MYI|MAD|MAI|MRG|TRG|TRN|ARM|ARZ|CSM|CSV|opt|par]'
+ regex = None
+ start_backup_time = None
+ skip_check_frm_timestamp = None
+
+ def __init__(self, datadir, skip_check_frm_timestamp, start_backup_time):
+ self.datadir = datadir
+ self.regex = re.compile(self.wildcard)
+ self.skip_check_frm_timestamp = skip_check_frm_timestamp
+ self.start_backup_time = start_backup_time
+
+ def process_db(self, db):
+ # do nothing
+ pass
+
+ def process_file(self, path):
+ # do nothing
+ pass
+
+ def check_frm_timestamp(self, fname, path):
+ if not self.skip_check_frm_timestamp and fname.endswith('.frm'):
+ if os.path.getmtime(path) > self.start_backup_time:
+ logger.error('FRM file %s was updated after starting backups. '
+ 'Schema could have changed and the resulting copy may '
+ 'not be valid. Aborting. '
+ '(backup time: %s, file modifled time: %s)',
+ path, datetime.datetime.fromtimestamp(self.start_backup_time).strftime('%Y-%m-%d %H:%M:%S'),
+ datetime.datetime.fromtimestamp(os.path.getmtime(path)).strftime('%Y-%m-%d %H:%M:%S'))
+ raise Exception("Inconsistent frm file timestamp");
+
+ def process(self):
+ os.chdir(self.datadir)
+ for db in self.get_databases():
+ logger.info("Starting MySQL misc file traversal from database %s..", db)
+ self.process_db(db)
+ for f in self.get_files(db):
+ if self.match(f):
+ rel_path = os.path.join(db, f)
+ self.check_frm_timestamp(f, rel_path)
+ self.process_file(rel_path)
+ logger.info("Traversing misc files from data directory..")
+ for f in self.get_files(""):
+ should_skip = False
+ for e in exclude_files:
+ if f.startswith(e) or f.endswith(e):
+ logger.info("Skipping %s", f)
+ should_skip = True
+ break
+ if not should_skip:
+ self.process_file(f)
+
+ def match(self, filename):
+ if self.regex.match(filename):
+ return True
+ else:
+ return False
+
+ def get_databases(self):
+ dbs = []
+ dirs = [ d for d in os.listdir(self.datadir) \
+ if not os.path.isfile(os.path.join(self.datadir,d))]
+ for db in dirs:
+ if not db.startswith('.') and not self._is_socket(db):
+ dbs.append(db)
+ return dbs
+
+ def get_files(self, db):
+ dbdir = self.datadir + "/" + db
+ return [ f for f in os.listdir(dbdir) \
+ if os.path.isfile(os.path.join(dbdir,f))]
+
+ def _is_socket(self, item):
+ mode = os.stat(os.path.join(self.datadir, item)).st_mode
+ if stat.S_ISSOCK(mode):
+ return True
+ return False
+
+
+class MySQLBackup(MiscFilesProcessor):
+ writer = None
+
+ def __init__(self, datadir, writer, skip_check_frm_timestamp, start_backup_time):
+ MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time)
+ self.writer = writer
+
+ def process_file(self, fname): # overriding base class
+ self.writer.write(fname)
+
+
+class MiscFilesLinkCreator(MiscFilesProcessor):
+ snapshot_dir = None
+
+ def __init__(self, datadir, snapshot_dir, skip_check_frm_timestamp, start_backup_time):
+ MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time)
+ self.snapshot_dir = snapshot_dir
+
+ def process_db(self, db):
+ snapshot_sub_dir = os.path.join(self.snapshot_dir, db)
+ os.makedirs(snapshot_sub_dir)
+
+ def process_file(self, path):
+ dst_path = os.path.join(self.snapshot_dir, path)
+ os.link(path, dst_path)
+
+
+# RocksDB backup
+class RocksDBBackup():
+ source_dir = None
+ writer = None
+ # sst files sent in this backup round
+ sent_sst = {}
+ # target sst files in this backup round
+ target_sst = {}
+ # sst files sent in all backup rounds
+ total_sent_sst= {}
+ # sum of sst file size sent in this backup round
+ sent_sst_size = 0
+ # sum of target sst file size in this backup round
+ # if sent_sst_size becomes equal to target_sst_size,
+ # it means the backup round finished backing up all sst files
+ target_sst_size = 0
+ # sum of all sst file size sent all backup rounds
+ total_sent_sst_size= 0
+ # sum of all target sst file size from all backup rounds
+ total_target_sst_size = 0
+ show_progress_size_interval= 1073741824 # 1GB
+ wal_files= []
+ manifest_files= []
+ finished= False
+
+ def __init__(self, source_dir, writer, prev):
+ self.source_dir = source_dir
+ self.writer = writer
+ os.chdir(self.source_dir)
+ self.init_target_files(prev)
+
+ def init_target_files(self, prev):
+ sst = {}
+ self.sent_sst = {}
+ self.target_sst= {}
+ self.total_sent_sst = {}
+ self.sent_sst_size = 0
+ self.target_sst_size = 0
+ self.total_sent_sst_size= 0
+ self.total_target_sst_size= 0
+ self.wal_files= []
+ self.manifest_files= []
+
+ for f in os.listdir(self.source_dir):
+ if f.endswith(rocksdb_data_suffix):
+ # exactly the same file (same size) was sent in previous backup rounds
+ if prev is not None and f in prev.total_sent_sst and int(os.stat(f).st_size) == prev.total_sent_sst[f]:
+ continue
+ sst[f]= int(os.stat(f).st_size)
+ self.target_sst_size = self.target_sst_size + os.stat(f).st_size
+ elif is_manifest(f):
+ self.manifest_files.append(f)
+ elif f.endswith(rocksdb_wal_suffix):
+ self.wal_files.append(f)
+ self.target_sst= collections.OrderedDict(sorted(sst.items()))
+
+ if prev is not None:
+ self.total_sent_sst = prev.total_sent_sst
+ self.total_sent_sst_size = prev.total_sent_sst_size
+ self.total_target_sst_size = self.target_sst_size + prev.total_sent_sst_size
+ else:
+ self.total_target_sst_size = self.target_sst_size
+
+ def do_backup_single(self, fname):
+ self.writer.write(fname)
+ os.remove(fname)
+
+ def do_backup_sst(self, fname, size):
+ self.do_backup_single(fname)
+ self.sent_sst[fname]= size
+ self.total_sent_sst[fname]= size
+ self.sent_sst_size = self.sent_sst_size + size
+ self.total_sent_sst_size = self.total_sent_sst_size + size
+
+ def do_backup_manifest(self):
+ for f in self.manifest_files:
+ self.do_backup_single(f)
+
+ def do_backup_wal(self):
+ for f in self.wal_files:
+ self.do_backup_single(f)
+
+ # this is the last snapshot round. backing up all the rest files
+ def do_backup_final(self):
+ logger.info("Backup WAL..")
+ self.do_backup_wal()
+ logger.info("Backup Manifest..")
+ self.do_backup_manifest()
+ self.do_cleanup()
+ self.finished= True
+
+ def do_cleanup(self):
+ shutil.rmtree(self.source_dir)
+ logger.info("Cleaned up checkpoint from %s", self.source_dir)
+
+ def do_backup_until(self, time_limit):
+ logger.info("Starting backup from snapshot: target files %d", len(self.target_sst))
+ start_time= time.time()
+ last_progress_time= start_time
+ progress_size= 0
+ for fname, size in self.target_sst.iteritems():
+ self.do_backup_sst(fname, size)
+ progress_size= progress_size + size
+ elapsed_seconds = time.time() - start_time
+ progress_seconds = time.time() - last_progress_time
+
+ if self.should_show_progress(size):
+ self.show_progress(progress_size, progress_seconds)
+ progress_size=0
+ last_progress_time= time.time()
+
+ if elapsed_seconds > time_limit and self.has_sent_all_sst() is False:
+ logger.info("Snapshot round finished. Elapsed Time: %5.2f. Remaining sst files: %d",
+ elapsed_seconds, len(self.target_sst) - len(self.sent_sst))
+ self.do_cleanup()
+ break;
+ if self.has_sent_all_sst():
+ self.do_backup_final()
+
+ return self
+
+ def should_show_progress(self, size):
+ if int(self.total_sent_sst_size/self.show_progress_size_interval) > int((self.total_sent_sst_size-size)/self.show_progress_size_interval):
+ return True
+ else:
+ return False
+
+ def show_progress(self, size, seconds):
+ logger.info("Backup Progress: %5.2f%% Sent %6.2f GB of %6.2f GB data, Transfer Speed: %6.2f MB/s",
+ self.total_sent_sst_size*100/self.total_target_sst_size,
+ self.total_sent_sst_size/1024/1024/1024,
+ self.total_target_sst_size/1024/1024/1024,
+ size/seconds/1024/1024)
+
+ def print_backup_report(self):
+ logger.info("Sent %6.2f GB of sst files, %d files in total.",
+ self.total_sent_sst_size/1024/1024/1024,
+ len(self.total_sent_sst))
+
+ def has_sent_all_sst(self):
+ if self.sent_sst_size == self.target_sst_size:
+ return True
+ return False
+
+
+class MySQLUtil:
+ @staticmethod
+ def connect(user, password, port, socket=None):
+ if socket:
+ dbh = MySQLdb.Connect(user=user,
+ passwd=password,
+ unix_socket=socket)
+ else:
+ dbh = MySQLdb.Connect(user=user,
+ passwd=password,
+ port=port,
+ host="127.0.0.1")
+ return dbh
+
+ @staticmethod
+ def create_checkpoint(dbh, checkpoint_dir):
+ sql = ("SET GLOBAL rocksdb_create_checkpoint='{0}'"
+ .format(checkpoint_dir))
+ cur= dbh.cursor()
+ cur.execute(sql)
+ cur.close()
+
+ @staticmethod
+ def get_datadir(dbh):
+ sql = "SELECT @@datadir"
+ cur = dbh.cursor()
+ cur.execute(sql)
+ row = cur.fetchone()
+ return row[0]
+
+
+class BackupRunner:
+ datadir = None
+ start_backup_time = None
+
+ def __init__(self, datadir):
+ self.datadir = datadir
+ self.start_backup_time = time.time()
+
+ def start_backup_round(self, backup_round, prev_backup):
+ def signal_handler(*args):
+ logger.info("Got signal. Exit")
+ if b is not None:
+ logger.info("Cleaning up snapshot directory..")
+ b.do_cleanup()
+ sys.exit(1)
+
+ b = None
+ try:
+ signal.signal(signal.SIGINT, signal_handler)
+ w = None
+ if opts.output_stream:
+ w = StreamWriter(opts.output_stream)
+ else:
+ raise Exception("Currently only streaming backup is supported.")
+
+ snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round)
+ dbh = MySQLUtil.connect(opts.mysql_user,
+ opts.mysql_password,
+ opts.mysql_port,
+ opts.mysql_socket)
+ if not self.datadir:
+ self.datadir = MySQLUtil.get_datadir(dbh)
+ logger.info("Set datadir: %s", self.datadir)
+ logger.info("Creating checkpoint at %s", snapshot_dir)
+ MySQLUtil.create_checkpoint(dbh, snapshot_dir)
+ logger.info("Created checkpoint at %s", snapshot_dir)
+ b = RocksDBBackup(snapshot_dir, w, prev_backup)
+ return b.do_backup_until(opts.checkpoint_interval)
+ except Exception as e:
+ logger.error(e)
+ logger.error(traceback.format_exc())
+ if b is not None:
+ logger.info("Cleaning up snapshot directory.")
+ b.do_cleanup()
+ sys.exit(1)
+
+ def backup_mysql(self):
+ try:
+ w = None
+ if opts.output_stream:
+ w = StreamWriter(opts.output_stream)
+ else:
+ raise Exception("Currently only streaming backup is supported.")
+ b = MySQLBackup(self.datadir, w, opts.skip_check_frm_timestamp,
+ self.start_backup_time)
+ logger.info("Taking MySQL misc backups..")
+ b.process()
+ logger.info("MySQL misc backups done.")
+ except Exception as e:
+ logger.error(e)
+ logger.error(traceback.format_exc())
+ sys.exit(1)
+
+
+class WDTBackup:
+ datadir = None
+ start_backup_time = None
+
+ def __init__(self, datadir):
+ self.datadir = datadir
+ self.start_backup_time = time.time()
+
+ def cleanup(self, snapshot_dir, server_log):
+ if server_log:
+ server_log.seek(0)
+ logger.info("WDT server log:")
+ logger.info(server_log.read())
+ server_log.close()
+ if snapshot_dir:
+ logger.info("Cleaning up snapshot dir %s", snapshot_dir)
+ shutil.rmtree(snapshot_dir)
+
+ def backup_with_timeout(self, backup_round):
+ def signal_handler(*args):
+ logger.info("Got signal. Exit")
+ self.cleanup(snapshot_dir, server_log)
+ sys.exit(1)
+
+ logger.info("Starting backup round %d", backup_round)
+ snapshot_dir = None
+ server_log = None
+ try:
+ signal.signal(signal.SIGINT, signal_handler)
+ # create rocksdb snapshot
+ snapshot_dir = os.path.join(opts.checkpoint_directory, str(backup_round))
+ dbh = MySQLUtil.connect(opts.mysql_user,
+ opts.mysql_password,
+ opts.mysql_port,
+ opts.mysql_socket)
+ logger.info("Creating checkpoint at %s", snapshot_dir)
+ MySQLUtil.create_checkpoint(dbh, snapshot_dir)
+ logger.info("Created checkpoint at %s", snapshot_dir)
+
+ # get datadir if not provided
+ if not self.datadir:
+ self.datadir = MySQLUtil.get_datadir(dbh)
+ logger.info("Set datadir: %s", self.datadir)
+
+ # create links for misc files
+ link_creator = MiscFilesLinkCreator(self.datadir, snapshot_dir,
+ opts.skip_check_frm_timestamp,
+ self.start_backup_time)
+ link_creator.process()
+
+ current_path = os.path.join(opts.backupdir, "CURRENT")
+
+ # construct receiver cmd, using the data directory as recovery-id.
+ # we delete the current file because it is not append-only, therefore not
+ # resumable.
+ remote_cmd = (
+ "ssh {0} rm -f {1}; "
+ "{2} -directory {3} -enable_download_resumption "
+ "-recovery_id {4} -start_port 0 -abort_after_seconds {5} {6}"
+ ).format(opts.destination,
+ current_path,
+ wdt_bin,
+ opts.backupdir,
+ self.datadir,
+ opts.checkpoint_interval,
+ opts.extra_wdt_receiver_options)
+ logger.info("WDT remote cmd %s", remote_cmd)
+ server_log = tempfile.TemporaryFile()
+ remote_process = subprocess.Popen(remote_cmd.split(),
+ stdout=subprocess.PIPE,
+ stderr=server_log)
+ wdt_url = remote_process.stdout.readline().strip()
+ if not wdt_url:
+ raise Exception("Unable to get connection url from wdt receiver")
+ sender_cmd = (
+ "{0} -connection_url \'{1}\' -directory {2} -app_name=myrocks "
+ "-avg_mbytes_per_sec {3} "
+ "-enable_download_resumption -abort_after_seconds {4} {5}"
+ ).format(wdt_bin,
+ wdt_url,
+ snapshot_dir,
+ opts.avg_mbytes_per_sec,
+ opts.checkpoint_interval,
+ opts.extra_wdt_sender_options)
+ sender_status = os.system(sender_cmd) >> 8
+ remote_status = remote_process.wait()
+ self.cleanup(snapshot_dir, server_log)
+ # TODO: handle retryable and non-retyable errors differently
+ return (sender_status == 0 and remote_status == 0)
+
+ except Exception as e:
+ logger.error(e)
+ logger.error(traceback.format_exc())
+ self.cleanup(snapshot_dir, server_log)
+ sys.exit(1)
+
+
+def backup_using_wdt():
+ if not opts.destination:
+ logger.error("Must provide remote destination when using WDT")
+ sys.exit(1)
+
+ # TODO: detect whether WDT is installed
+ logger.info("Backing up myrocks to %s using WDT", opts.destination)
+ wdt_backup = WDTBackup(opts.datadir)
+ finished = False
+ backup_round = 1
+ while not finished:
+ start_time = time.time()
+ finished = wdt_backup.backup_with_timeout(backup_round)
+ end_time = time.time()
+ duration_seconds = end_time - start_time
+ if (not finished) and (duration_seconds < opts.checkpoint_interval):
+ # round finished before timeout
+ sleep_duration = (opts.checkpoint_interval - duration_seconds)
+ logger.info("Sleeping for %f seconds", sleep_duration)
+ time.sleep(sleep_duration)
+
+ backup_round = backup_round + 1
+ logger.info("Finished myrocks backup using WDT")
+
+
+def init_logger():
+ global logger
+ logger = logging.getLogger('myrocks_hotbackup')
+ logger.setLevel(logging.INFO)
+ h1= logging.StreamHandler(sys.stderr)
+ f = logging.Formatter("%(asctime)s.%(msecs)03d %(levelname)s %(message)s",
+ "%Y-%m-%d %H:%M:%S")
+ h1.setFormatter(f)
+ logger.addHandler(h1)
+
+backup_wdt_usage = ("Backup using WDT: myrocks_hotbackup "
+ "--user=root --password=pw --stream=wdt "
+ "--checkpoint_dir=<directory where temporary backup hard links "
+ "are created> --destination=<remote host name> --backup_dir="
+ "<remote directory name>. This has to be executed at the src "
+ "host.")
+backup_usage= "Backup: set -o pipefail; myrocks_hotbackup --user=root --password=pw --port=3306 --checkpoint_dir=<directory where temporary backup hard links are created> | ssh -o NoneEnabled=yes remote_server 'tar -xi -C <directory on remote server where backups will be sent>' . You need to execute backup command on a server where you take backups."
+move_back_usage= "Move-Back: myrocks_hotbackup --move_back --datadir=<dest mysql datadir> --rocksdb_datadir=<dest rocksdb datadir> --rocksdb_waldir=<dest rocksdb wal dir> --backup_dir=<where backup files are stored> . You need to execute move-back command on a server where backup files are sent."
+
+
+def parse_options():
+ global opts
+ parser = OptionParser(usage = "\n\n" + backup_usage + "\n\n" + \
+ backup_wdt_usage + "\n\n" + move_back_usage)
+ parser.add_option('-i', '--interval', type='int', dest='checkpoint_interval',
+ default=300,
+ help='Number of seconds to renew checkpoint')
+ parser.add_option('-c', '--checkpoint_dir', type='string', dest='checkpoint_directory',
+ default='/data/mysql/backup/snapshot',
+ help='Local directory name where checkpoints will be created.')
+ parser.add_option('-d', '--datadir', type='string', dest='datadir',
+ default=None,
+ help='backup mode: src MySQL datadir. move_back mode: dest MySQL datadir')
+ parser.add_option('-s', '--stream', type='string', dest='output_stream',
+ default='tar',
+ help='Setting streaming backup options. Currently tar, WDT '
+ 'and xbstream are supported. Default is tar')
+ parser.add_option('--destination', type='string', dest='destination',
+ default='',
+ help='Remote server name. Only used for WDT mode so far.')
+ parser.add_option('--avg_mbytes_per_sec', type='int',
+ dest='avg_mbytes_per_sec',
+ default=500,
+ help='Average backup rate in MBytes/sec. WDT only.')
+ parser.add_option('--extra_wdt_sender_options', type='string',
+ dest='extra_wdt_sender_options',
+ default='',
+ help='Extra options for WDT sender')
+ parser.add_option('--extra_wdt_receiver_options', type='string',
+ dest='extra_wdt_receiver_options',
+ default='',
+ help='Extra options for WDT receiver')
+ parser.add_option('-u', '--user', type='string', dest='mysql_user',
+ default='root',
+ help='MySQL user name')
+ parser.add_option('-p', '--password', type='string', dest='mysql_password',
+ default='',
+ help='MySQL password name')
+ parser.add_option('-P', '--port', type='int', dest='mysql_port',
+ default=3306,
+ help='MySQL port number')
+ parser.add_option('-S', '--socket', type='string', dest='mysql_socket',
+ default=None,
+ help='MySQL socket path. Takes precedence over --port.')
+ parser.add_option('-m', '--move_back', action='store_true', dest='move_back',
+ default=False,
+ help='Moving MyRocks backup files to proper locations.')
+ parser.add_option('-r', '--rocksdb_datadir', type='string', dest='rocksdb_datadir',
+ default=None,
+ help='RocksDB target data directory where backup data files will be moved. Must be empty.')
+ parser.add_option('-w', '--rocksdb_waldir', type='string', dest='rocksdb_waldir',
+ default=None,
+ help='RocksDB target data directory where backup wal files will be moved. Must be empty.')
+ parser.add_option('-b', '--backup_dir', type='string', dest='backupdir',
+ default=None,
+ help='backup mode for WDT: Remote directory to store '
+ 'backup. move_back mode: Locations where backup '
+ 'files are stored.')
+ parser.add_option('-f', '--skip_check_frm_timestamp',
+ dest='skip_check_frm_timestamp',
+ action='store_true', default=False,
+ help='skipping to check if frm files are updated after starting backup.')
+ parser.add_option('-D', '--debug_signal_file', type='string', dest='debug_signal_file',
+ default=None,
+ help='debugging purpose: waiting until the specified file is created')
+
+ opts, args = parser.parse_args()
+
+
+def create_moveback_dir(directory):
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+ else:
+ for f in os.listdir(directory):
+ logger.error("Directory %s has file or directory %s!", directory, f)
+ raise
+
+def print_move_back_usage():
+ logger.warning(move_back_usage)
+
+def move_back():
+ if opts.rocksdb_datadir is None or opts.rocksdb_waldir is None or opts.backupdir is None or opts.datadir is None:
+ print_move_back_usage()
+ sys.exit()
+ create_moveback_dir(opts.datadir)
+ create_moveback_dir(opts.rocksdb_datadir)
+ create_moveback_dir(opts.rocksdb_waldir)
+
+ os.chdir(opts.backupdir)
+ for f in os.listdir(opts.backupdir):
+ if os.path.isfile(os.path.join(opts.backupdir,f)):
+ if f.endswith(rocksdb_wal_suffix):
+ shutil.move(f, opts.rocksdb_waldir)
+ elif f.endswith(rocksdb_data_suffix) or is_manifest(f):
+ shutil.move(f, opts.rocksdb_datadir)
+ else:
+ shutil.move(f, opts.datadir)
+ else: #directory
+ if f.endswith('.rocksdb'):
+ continue
+ shutil.move(f, opts.datadir)
+
+def start_backup():
+ logger.info("Starting backup.")
+ runner = BackupRunner(opts.datadir)
+ b = None
+ backup_round= 1
+ while True:
+ b = runner.start_backup_round(backup_round, b)
+ backup_round = backup_round + 1
+ if b.finished is True:
+ b.print_backup_report()
+ logger.info("RocksDB Backup Done.")
+ break
+ if opts.debug_signal_file:
+ while not os.path.exists(opts.debug_signal_file):
+ logger.info("Waiting until %s is created..", opts.debug_signal_file)
+ time.sleep(1)
+ runner.backup_mysql()
+ logger.info("All Backups Done.")
+
+
+def main():
+ parse_options()
+ init_logger()
+
+ if opts.move_back is True:
+ move_back()
+ elif opts.output_stream == 'wdt':
+ backup_using_wdt()
+ else:
+ start_backup()
+
+if __name__ == "__main__":
+ main()
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc
new file mode 100644
index 00000000000..ba2e7ace0c5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc
@@ -0,0 +1,150 @@
+--echo #
+--echo # Testing concurrent transactions.
+--echo #
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+connect (con3,localhost,root,,);
+
+connection con1;
+begin;
+insert into t values (); # 1
+
+connection con2;
+begin;
+insert into t values (); # 2
+
+connection con3;
+begin;
+insert into t values (); # 3
+
+connection con1;
+insert into t values (); # 4
+
+connection con2;
+insert into t values (); # 5
+
+connection con3;
+insert into t values (); # 6
+
+connection con2;
+commit;
+
+connection con3;
+rollback;
+
+connection con1;
+commit;
+
+delete from t;
+
+--echo # Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Slave value before restart
+sync_slave_with_master;
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+connection slave;
+--source include/stop_slave.inc
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+
+connection default;
+--echo # Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--let $rpl_server_number = 2
+--source include/rpl_restart_server.inc
+
+connection slave;
+--source include/start_slave.inc
+--echo # Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+--source include/wait_until_count_sessions.inc
+
+--echo #
+--echo # Testing interaction of merge markers with various DDL statements.
+--echo #
+connection slave;
+--source include/stop_slave.inc
+
+connection default;
+
+--echo # Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Remove auto_increment property.
+alter table t modify i int;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Add column j.
+alter table t add column j int;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Rename tables.
+rename table t to t2;
+rename table t2 to t;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Change auto_increment property
+alter table t auto_increment = 1000;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t drop primary key, add key (i), auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t add key (j), auto_increment = 1;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+
+--echo # Drop table.
+drop table t;
+
+--let $rpl_server_number = 1
+--source include/rpl_restart_server.inc
+
+connection slave;
+--source include/start_slave.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
index 87cb1f70f32..6472b969ce6 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
@@ -1,6 +1,4 @@
---disable_warnings
-DROP TABLE IF EXISTS t1, t2, t3;
---enable_warnings
+--source include/count_sessions.inc
if ($data_order_desc)
{
@@ -20,7 +18,7 @@ eval CREATE TABLE t1(
b CHAR(30),
PRIMARY KEY(pk) COMMENT "$pk_cf",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
# Create a second identical table to validate that bulk loading different
# tables in the same session works
@@ -30,7 +28,7 @@ eval CREATE TABLE t2(
b CHAR(30),
PRIMARY KEY(pk) COMMENT "$pk_cf",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
# Create a third table using partitions to validate that bulk loading works
# across a partitioned table
@@ -40,7 +38,7 @@ eval CREATE TABLE t3(
b CHAR(30),
PRIMARY KEY(pk) COMMENT "$pk_cf",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
@@ -154,3 +152,5 @@ EOF
# Cleanup
disconnect other;
DROP TABLE t1, t2, t3;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
new file mode 100644
index 00000000000..84a9d8c578e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
@@ -0,0 +1,143 @@
+--source include/count_sessions.inc
+
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+### Test individual INSERTs ###
+
+# A table with only a PK won't have rows until the bulk load is finished
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1;
+
+# A table with a PK and a SK shows rows immediately
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b))
+ ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1;
+
+# Inserting into another table finishes bulk load to the previous table
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+DROP TABLE t1, t2;
+
+### Test bulk load from a file ###
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB;
+eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+# Create a text file with data to import into the table.
+# PK and SK are not in any order
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+my $max = 5000000;
+my $sign = 1;
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $a = 1 + $sign * $ii;
+ my $b = 1 - $sign * $ii;
+ $sign = -$sign;
+ print $fh "$a\t$b\n";
+}
+close($fh);
+EOF
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(a) from t1;
+select count(b) from t1;
+select count(a) from t2;
+select count(b) from t2;
+select count(a) from t3;
+select count(b) from t3;
+
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+
+disconnect other;
+DROP TABLE t1, t2, t3;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
index 0c3ad720194..18365338d0c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
@@ -15,6 +15,10 @@ count(b)
300000
ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+ERROR HY000: Status error 10 received from RocksDB: Operation aborted: Failed to acquire lock due to max_num_locks limit
+set session rocksdb_bulk_load=1;
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=0;
SELECT COUNT(*) as c FROM
(SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE INDEX(`kb`)
UNION DISTINCT
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
index f8508febb01..5d947603ec5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result
@@ -778,3 +778,20 @@ set global rocksdb_force_flush_memtable_now = true;
select * from t1;
col1 col2 extra
DROP TABLE t1;
+create table t1 (i int auto_increment, key(i)) engine=rocksdb;
+insert into t1 values();
+insert into t1 values();
+insert into t1 values();
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ KEY `i` (`i`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) NOT NULL AUTO_INCREMENT,
+ KEY `i` (`i`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result
new file mode 100644
index 00000000000..1a2abbf3285
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result
@@ -0,0 +1,35 @@
+#
+# Test how MyRocks behaves when RocksDB reports corrupted data.
+#
+#
+# Test server crashes on corrupted data and restarts
+#
+create table t1 (
+pk int not null primary key,
+col1 varchar(10)
+) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+select * from t1 where pk=1;
+pk col1
+1 1
+set session debug= "+d,rocksdb_return_status_corrupted";
+select * from t1 where pk=1;
+ERROR HY000: Lost connection to MySQL server during query
+#
+# The same for scan queries
+#
+select * from t1;
+pk col1
+1 1
+2 2
+3 3
+set session debug= "+d,rocksdb_return_status_corrupted";
+select * from t1;
+ERROR HY000: Lost connection to MySQL server during query
+#
+# Test restart failure. The server is shutdown at this point.
+#
+#
+# Remove corruption file and restart cleanly
+#
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result
new file mode 100644
index 00000000000..ae29b4e415d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result
@@ -0,0 +1,113 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+create table t (i int primary key auto_increment) engine=rocksdb;
+#
+# Testing concurrent transactions.
+#
+begin;
+insert into t values ();
+begin;
+insert into t values ();
+begin;
+insert into t values ();
+insert into t values ();
+insert into t values ();
+insert into t values ();
+commit;
+rollback;
+commit;
+delete from t;
+# Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 7
+# Slave value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/stop_slave.inc
+include/rpl_restart_server.inc [server_number=1]
+# Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/rpl_restart_server.inc [server_number=2]
+include/start_slave.inc
+# Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+#
+# Testing interaction of merge markers with various DDL statements.
+#
+include/stop_slave.inc
+# Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+# Remove auto_increment property.
+alter table t modify i int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t NULL
+# Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Add column j.
+alter table t add column j int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Rename tables.
+rename table t to t2;
+rename table t2 to t;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Change auto_increment property
+alter table t auto_increment = 1000;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1000
+alter table t auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t drop primary key, add key (i), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t add key (j), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 16
+# Drop table.
+drop table t;
+include/rpl_restart_server.inc [server_number=1]
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result
new file mode 100644
index 00000000000..949928f5a9f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result
@@ -0,0 +1,113 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3;
+#
+# Testing concurrent transactions.
+#
+begin;
+insert into t values ();
+begin;
+insert into t values ();
+begin;
+insert into t values ();
+insert into t values ();
+insert into t values ();
+insert into t values ();
+commit;
+rollback;
+commit;
+delete from t;
+# Master value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 7
+# Slave value before restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/stop_slave.inc
+include/rpl_restart_server.inc [server_number=1]
+# Master value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+include/rpl_restart_server.inc [server_number=2]
+include/start_slave.inc
+# Slave value after restart
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+#
+# Testing interaction of merge markers with various DDL statements.
+#
+include/stop_slave.inc
+# Drop and add primary key.
+alter table t modify i int;
+alter table t drop primary key;
+alter table t add primary key (i);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1
+# Remove auto_increment property.
+alter table t modify i int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t NULL
+# Add auto_increment property.
+insert into t values (123);
+alter table t modify i int auto_increment;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Add column j.
+alter table t add column j int;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Rename tables.
+rename table t to t2;
+rename table t2 to t;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+# Change auto_increment property
+alter table t auto_increment = 1000;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 1000
+alter table t auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t drop primary key, add key (i), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t add key (j), auto_increment = 1;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 124
+alter table t modify i int;
+alter table t add column (k int auto_increment), add key(k), auto_increment=15;
+include/rpl_restart_server.inc [server_number=1]
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 16
+# Drop table.
+drop table t;
+include/rpl_restart_server.inc [server_number=1]
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result
new file mode 100644
index 00000000000..79030e35225
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result
@@ -0,0 +1,107 @@
+#
+# Testing upgrading from server without merges for auto_increment
+# to new server with such support.
+#
+set debug='+d,myrocks_autoinc_upgrade';
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+2
+3
+delete from t where i > 1;
+select * from t;
+i
+1
+select table_name, index_name, auto_increment
+from information_schema.rocksdb_ddl where table_name = 't';
+table_name index_name auto_increment
+t PRIMARY NULL
+set debug='-d,myrocks_autoinc_upgrade';
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+2
+3
+4
+select table_name, index_name, auto_increment
+from information_schema.rocksdb_ddl where table_name = 't';
+table_name index_name auto_increment
+t PRIMARY 5
+delete from t where i > 1;
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+i
+1
+5
+6
+7
+drop table t;
+#
+# Testing crash safety of transactions.
+#
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+# Before anything
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_before";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 4
+select max(i) from t;
+max(i)
+3
+# After engine prepare
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_after_prepare";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 4
+select max(i) from t;
+max(i)
+3
+# After binlog
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_after_log";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 6
+select max(i) from t;
+max(i)
+5
+# After everything
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_after";
+commit;
+ERROR HY000: Lost connection to MySQL server during query
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+table_schema table_name auto_increment
+test t 8
+select max(i) from t;
+max(i)
+7
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
index 0fb3d96c58f..5da9a7e7e1c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
@@ -61,3 +61,82 @@ LAST_INSERT_ID()
SELECT a FROM t1 ORDER BY a;
a
DROP TABLE t1;
+#---------------------------
+# test large autoincrement values
+#---------------------------
+SET auto_increment_increment = 1;
+SET auto_increment_offset = 1;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+18446744073709551614 b
+DROP TABLE t1;
+SET auto_increment_increment = 300;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+DROP TABLE t1;
+SET auto_increment_offset = 200;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'b');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (NULL, 'c');
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+a b
+18446744073709551613 a
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result b/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result
deleted file mode 100644
index 28b5b6cd070..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result
+++ /dev/null
@@ -1 +0,0 @@
-# The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE.
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
index b931a61e233..4a746d64c87 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in ascending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result
new file mode 100644
index 00000000000..1b1cf524011
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result
@@ -0,0 +1,8 @@
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1);
+DROP TABLE t1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+ERROR 42S02: Table 'test.t1' doesn't exist
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
index f230b173892..86a88c30d89 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
@@ -1,4 +1,4 @@
-CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
INSERT INTO t1 VALUES(10);
INSERT INTO t1 VALUES(11);
@@ -26,6 +26,15 @@ select @@rocksdb_bulk_load;
@@rocksdb_bulk_load
0
call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+SELECT * FROM t1;
+pk
+10
+11
TRUNCATE TABLE t1;
SET rocksdb_bulk_load_allow_unsorted=1;
SET rocksdb_bulk_load=1;
@@ -53,3 +62,34 @@ pk
202
SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
+CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (),(),();
+ERROR HY000: Rows must be inserted in primary key order during bulk load operation
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+SET @orig_table_open_cache=@@global.table_open_cache;
+CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(13, 0);
+INSERT INTO t1 VALUES(2, 'test 2');
+Warnings:
+Warning 1366 Incorrect integer value: 'test 2' for column 'b' at row 1
+INSERT INTO t1 VALUES(@id, @arg04);
+SET @@global.table_open_cache=FALSE;
+Warnings:
+Warning 1292 Truncated incorrect table_open_cache value: '0'
+INSERT INTO t1 VALUES(51479+0.333333333,1);
+DROP TABLE t1;
+SET @@global.table_open_cache=@orig_table_open_cache;
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1), (2);
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (3);
+ERROR HY000: Rows inserted during bulk load must not overlap existing rows
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
index 947f67434a5..4fd7ae9d9a5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in ascending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
index 6c38e030afb..7d7c9f34200 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in descending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "rev:cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
index e566691af28..c1b6d48a6a5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
@@ -1,4 +1,3 @@
-DROP TABLE IF EXISTS t1, t2, t3;
Data will be ordered in descending order
CREATE TABLE t1(
pk CHAR(5),
@@ -6,21 +5,21 @@ a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t2(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin';
+) ENGINE=ROCKSDB COLLATE 'latin1_bin';
CREATE TABLE t3(
pk CHAR(5),
a CHAR(30),
b CHAR(30),
PRIMARY KEY(pk) COMMENT "cf1",
KEY(a)
-) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
index 2a7c7bd69fd..9a7c2560819 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
@@ -1,12 +1,12 @@
-DROP TABLE IF EXISTS t1;
SET rocksdb_bulk_load_size=3;
SET rocksdb_bulk_load_allow_unsorted=1;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
-3 5
-1 3
@@ -14,42 +14,49 @@ a b
4 -2
6 -4
DROP TABLE t1;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b));
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b))
+ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
-6 -4
-4 -2
-2 0
--1 3
--3 5
SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+-3 5
+-1 3
+2 0
+4 -2
+6 -4
DROP TABLE t1;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
-CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
SET rocksdb_bulk_load=1;
INSERT INTO t1 VALUES (1,1);
INSERT INTO t2 VALUES (1,1);
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
1 1
INSERT INTO t1 VALUES (2,2);
-SELECT * FROM t2;
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
a b
1 1
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
1 1
SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
a b
1 1
2 2
DROP TABLE t1, t2;
-CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
-CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1");
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1")
+ENGINE=ROCKSDB;
CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
-PARTITION BY KEY() PARTITIONS 4;
+ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
@@ -99,5 +106,14 @@ count(a)
select count(b) from t3;
count(b)
5000000
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+-4999998 5000000
+-4999996 4999998
+-4999994 4999996
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+4999999 -4999997
+4999997 -4999995
+4999995 -4999993
DROP TABLE t1, t2, t3;
-SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
new file mode 100644
index 00000000000..fcd05fd60b4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
@@ -0,0 +1,117 @@
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1", KEY(b))
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+2 2
+1 1
+DROP TABLE t1, t2;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "rev:cf1")
+ENGINE=ROCKSDB;
+CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
+ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+4999999 -4999997
+4999997 -4999995
+4999995 -4999993
+SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3;
+a b
+-4999998 5000000
+-4999996 4999998
+-4999994 4999996
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
index 3bd87e9ffd6..c59b9804ef3 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result
@@ -1,4 +1,38 @@
-DROP TABLE IF EXISTS t1,t10,t11;
+CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb;
+insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4),
+(5, 4),(6, 4),(7, 4),(8, 4),(9, 4);
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="id";
+cardinality
+NULL
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="a";
+cardinality
+NULL
+ANALYZE TABLE t0;
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+FLOOR(@N/cardinality)
+1
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+FLOOR(@N/cardinality)
+2
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+ANALYZE TABLE t0;
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+FLOOR(@N/cardinality)
+1
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+FLOOR(@N/cardinality)
+2
+drop table t0;
create table t1(
id bigint not null primary key,
i1 bigint, #unique
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result
new file mode 100644
index 00000000000..24c1b730325
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result
@@ -0,0 +1,6 @@
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+variable_name variable_value
+ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+variable_name variable_value
+ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
index d7cb89becb7..1e7509172cb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
@@ -66,13 +66,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -122,13 +115,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -147,13 +133,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -204,13 +183,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -229,13 +201,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -254,13 +219,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -295,13 +253,6 @@ KEY
LOCK TYPE: EXCLUSIVE
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
-----------------------------------------
@@ -324,8 +275,12 @@ i
3
select * from t where i=2 for update;
select * from t where i=3 for update;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
select * from t where i=1 for update;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+deadlocks
+true
rollback;
i
3
@@ -410,13 +365,6 @@ KEY
LOCK TYPE: SHARED
INDEX NAME: PRIMARY
TABLE NAME: test.t
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: PRIMARY
-TABLE NAME: test.t
--------TXN_ID GOT DEADLOCK---------
@@ -455,13 +403,6 @@ KEY
LOCK TYPE: SHARED
INDEX NAME: NOT FOUND; IDX_ID
TABLE NAME: NOT FOUND; IDX_ID
----------------WAITING FOR---------------
-TXN_ID
-COLUMN FAMILY NAME: default
-KEY
-LOCK TYPE: EXCLUSIVE
-INDEX NAME: NOT FOUND; IDX_ID
-TABLE NAME: NOT FOUND; IDX_ID
--------TXN_ID GOT DEADLOCK---------
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
index a39f2d8c0d6..526d6247e60 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
@@ -1,17 +1,22 @@
DROP TABLE IF EXISTS is_ddl_t1;
DROP TABLE IF EXISTS is_ddl_t2;
+DROP TABLE IF EXISTS is_ddl_t3;
CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT,
PRIMARY KEY (i), KEY (j), KEY (k, l) COMMENT 'kl_cf')
ENGINE = ROCKSDB;
CREATE TABLE is_ddl_t2 (x INT, y INT, z INT,
PRIMARY KEY (z, y) COMMENT 'zy_cf',
KEY (x)) ENGINE = ROCKSDB;
-SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
-TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF
-test is_ddl_t1 NULL PRIMARY 1 13 default
-test is_ddl_t1 NULL j 2 13 default
-test is_ddl_t1 NULL k 2 13 kl_cf
-test is_ddl_t2 NULL PRIMARY 1 13 zy_cf
-test is_ddl_t2 NULL x 2 13 default
+CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB
+COMMENT "ttl_duration=3600;";
+SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
+TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF TTL_DURATION INDEX_FLAGS
+test is_ddl_t3 NULL PRIMARY 1 13 default 3600 1
+test is_ddl_t2 NULL PRIMARY 1 13 zy_cf 0 0
+test is_ddl_t2 NULL x 2 13 default 0 0
+test is_ddl_t1 NULL PRIMARY 1 13 default 0 0
+test is_ddl_t1 NULL j 2 13 default 0 0
+test is_ddl_t1 NULL k 2 13 kl_cf 0 0
DROP TABLE is_ddl_t1;
DROP TABLE is_ddl_t2;
+DROP TABLE is_ddl_t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result
new file mode 100644
index 00000000000..1c67387bcdc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result
@@ -0,0 +1,172 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+show create table information_schema.rocksdb_deadlock;
+Table Create Table
+ROCKSDB_DEADLOCK CREATE TEMPORARY TABLE `ROCKSDB_DEADLOCK` (
+ `DEADLOCK_ID` bigint(8) NOT NULL DEFAULT '0',
+ `TRANSACTION_ID` bigint(8) NOT NULL DEFAULT '0',
+ `CF_NAME` varchar(193) NOT NULL DEFAULT '',
+ `WAITING_KEY` varchar(513) NOT NULL DEFAULT '',
+ `LOCK_TYPE` varchar(193) NOT NULL DEFAULT '',
+ `INDEX_NAME` varchar(193) NOT NULL DEFAULT '',
+ `TABLE_NAME` varchar(193) NOT NULL DEFAULT '',
+ `ROLLED_BACK` bigint(8) NOT NULL DEFAULT '0'
+) ENGINE=MEMORY DEFAULT CHARSET=utf8
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+Deadlock #1
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+Deadlock #2
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+set global rocksdb_max_latest_deadlocks = 1;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 for update;
+i
+3
+select * from t where i=2 for update;
+select * from t where i=3 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+3
+rollback;
+i
+2
+rollback;
+set global rocksdb_max_latest_deadlocks = 5;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+Deadlock #5
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=100 for update;
+i
+select * from t where i=101 for update;
+i
+select * from t where i=2 for update;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=200 for update;
+i
+select * from t where i=201 for update;
+i
+select * from t where i=1 lock in share mode;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+rollback;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED PRIMARY test.t 1
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE INDEX_NAME TABLE_NAME 0
+DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED INDEX_NAME TABLE_NAME 1
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+select * from information_schema.rocksdb_deadlock;
+DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
index f63a271cdce..7fb9055083b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
@@ -25,10 +25,10 @@ UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
DROP TABLE t0, t1;
create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb;
insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300);
+set global rocksdb_force_flush_memtable_now=1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
-set global rocksdb_force_flush_memtable_now=1;
explain select * from t1 where key1 = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref key1 key1 5 const #
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
index 62875e378a4..797f339d8b1 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
@@ -6,6 +6,19 @@ t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
@@ -19,3 +32,37 @@ SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
DROP TABLE t1;
+CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+INSERT INTO t1 VALUES (5);
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES (1000);
+Warnings:
+Warning 1264 Out of range value for column 'pk' at row 1
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 2 15 30 0 0 0 127 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+127
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
index d0bfb05fd1b..96efca6e2b7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
@@ -8,6 +8,7 @@ ROW_LOCK_WAIT_TIMEOUTS
begin;
set @@rocksdb_lock_wait_timeout=1;
begin;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
insert into t values(0);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
@@ -16,6 +17,10 @@ ROW_LOCK_WAIT_TIMEOUTS
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
ROW_LOCK_WAIT_TIMEOUTS
1
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+waits
+true
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
insert into t values(0);
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
@@ -24,4 +29,7 @@ ROW_LOCK_WAIT_TIMEOUTS
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
ROW_LOCK_WAIT_TIMEOUTS
2
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
+waits
+true
drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result
new file mode 100644
index 00000000000..5a1eeb9fa3f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result
@@ -0,0 +1,20 @@
+CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*");
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files
+1
+SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files;
+@@global.open_files_limit - 1 = @@global.rocksdb_max_open_files
+1
+SELECT @@global.rocksdb_max_open_files;
+@@global.rocksdb_max_open_files
+0
+CREATE TABLE t1(a INT) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(0),(1),(2),(3),(4);
+SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1;
+DROP TABLE t1;
+SELECT @@global.rocksdb_max_open_files;
+@@global.rocksdb_max_open_files
+-1
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files
+1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
index 27b1779627b..1fe61fe9fc5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result
@@ -36,7 +36,7 @@ explain select b, d from t where d > 4;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
rows_read
-1509
+1505
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -44,7 +44,7 @@ explain select a, b, c, d from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
@@ -58,13 +58,13 @@ explain select e from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select e from t where a = 5 and d <= 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where
rows_read
-251
+250
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -72,13 +72,13 @@ explain select a, b, c, d from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-51
+26
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -86,13 +86,13 @@ explain select e from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select e from t where a = 5 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where
rows_read
-251
+250
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -100,13 +100,13 @@ explain select a, b, c, d from t where a in (1, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index
rows_read
-502
+500
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-102
+52
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -114,13 +114,13 @@ explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index
rows_read
-753
+750
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-153
+78
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -128,13 +128,13 @@ explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index
rows_read
-204
+200
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-44
+24
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -142,13 +142,13 @@ explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) a
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index
rows_read
-765
+750
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-165
+90
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -156,13 +156,13 @@ explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY,b PRIMARY 8 const,const # Using where; Using index
rows_read
-51
+50
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan
rows_read
-11
+6
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=off';
@@ -170,7 +170,7 @@ explain select a+1, b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select a+1, b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
@@ -184,7 +184,7 @@ explain select b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index
rows_read
-251
+250
set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off';
explain select b, c, d from t where a = 5 and d < 3;
id select_type table type possible_keys key key_len ref rows Extra
@@ -204,7 +204,7 @@ explain select a, b, c, d from t where a = b and d >= 98;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan
rows_read
-9
+5
include/diff_tables.inc [temp_orig, temp_skip]
set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on';
set optimizer_switch = 'skip_scan=on';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
index 6586b92d129..28f965843aa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result
@@ -14,8 +14,13 @@ test t1 NULL BLOCK_READ_BYTE #
test t1 NULL BLOCK_READ_TIME #
test t1 NULL BLOCK_CHECKSUM_TIME #
test t1 NULL BLOCK_DECOMPRESS_TIME #
+test t1 NULL GET_READ_BYTES #
+test t1 NULL MULTIGET_READ_BYTES #
+test t1 NULL ITER_READ_BYTES #
test t1 NULL INTERNAL_KEY_SKIPPED_COUNT #
test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT #
+test t1 NULL INTERNAL_RECENT_SKIPPED_COUNT #
+test t1 NULL INTERNAL_MERGE_COUNT #
test t1 NULL GET_SNAPSHOT_TIME #
test t1 NULL GET_FROM_MEMTABLE_TIME #
test t1 NULL GET_FROM_MEMTABLE_COUNT #
@@ -23,9 +28,12 @@ test t1 NULL GET_POST_PROCESS_TIME #
test t1 NULL GET_FROM_OUTPUT_FILES_TIME #
test t1 NULL SEEK_ON_MEMTABLE_TIME #
test t1 NULL SEEK_ON_MEMTABLE_COUNT #
+test t1 NULL NEXT_ON_MEMTABLE_COUNT #
+test t1 NULL PREV_ON_MEMTABLE_COUNT #
test t1 NULL SEEK_CHILD_SEEK_TIME #
test t1 NULL SEEK_CHILD_SEEK_COUNT #
-test t1 NULL SEEK_IN_HEAP_TIME #
+test t1 NULL SEEK_MIN_HEAP_TIME #
+test t1 NULL SEEK_MAX_HEAP_TIME #
test t1 NULL SEEK_INTERNAL_SEEK_TIME #
test t1 NULL FIND_NEXT_USER_ENTRY_TIME #
test t1 NULL WRITE_WAL_TIME #
@@ -41,6 +49,12 @@ test t1 NULL NEW_TABLE_BLOCK_ITER_NANOS #
test t1 NULL NEW_TABLE_ITERATOR_NANOS #
test t1 NULL BLOCK_SEEK_NANOS #
test t1 NULL FIND_TABLE_NANOS #
+test t1 NULL BLOOM_MEMTABLE_HIT_COUNT #
+test t1 NULL BLOOM_MEMTABLE_MISS_COUNT #
+test t1 NULL BLOOM_SST_HIT_COUNT #
+test t1 NULL BLOOM_SST_MISS_COUNT #
+test t1 NULL KEY_LOCK_WAIT_TIME #
+test t1 NULL KEY_LOCK_WAIT_COUNT #
test t1 NULL IO_THREAD_POOL_ID #
test t1 NULL IO_BYTES_WRITTEN #
test t1 NULL IO_BYTES_READ #
@@ -59,8 +73,13 @@ BLOCK_READ_BYTE #
BLOCK_READ_TIME #
BLOCK_CHECKSUM_TIME #
BLOCK_DECOMPRESS_TIME #
+GET_READ_BYTES #
+MULTIGET_READ_BYTES #
+ITER_READ_BYTES #
INTERNAL_KEY_SKIPPED_COUNT #
INTERNAL_DELETE_SKIPPED_COUNT #
+INTERNAL_RECENT_SKIPPED_COUNT #
+INTERNAL_MERGE_COUNT #
GET_SNAPSHOT_TIME #
GET_FROM_MEMTABLE_TIME #
GET_FROM_MEMTABLE_COUNT #
@@ -68,9 +87,12 @@ GET_POST_PROCESS_TIME #
GET_FROM_OUTPUT_FILES_TIME #
SEEK_ON_MEMTABLE_TIME #
SEEK_ON_MEMTABLE_COUNT #
+NEXT_ON_MEMTABLE_COUNT #
+PREV_ON_MEMTABLE_COUNT #
SEEK_CHILD_SEEK_TIME #
SEEK_CHILD_SEEK_COUNT #
-SEEK_IN_HEAP_TIME #
+SEEK_MIN_HEAP_TIME #
+SEEK_MAX_HEAP_TIME #
SEEK_INTERNAL_SEEK_TIME #
FIND_NEXT_USER_ENTRY_TIME #
WRITE_WAL_TIME #
@@ -86,6 +108,12 @@ NEW_TABLE_BLOCK_ITER_NANOS #
NEW_TABLE_ITERATOR_NANOS #
BLOCK_SEEK_NANOS #
FIND_TABLE_NANOS #
+BLOOM_MEMTABLE_HIT_COUNT #
+BLOOM_MEMTABLE_MISS_COUNT #
+BLOOM_SST_HIT_COUNT #
+BLOOM_SST_MISS_COUNT #
+KEY_LOCK_WAIT_TIME #
+KEY_LOCK_WAIT_COUNT #
IO_THREAD_POOL_ID #
IO_BYTES_WRITTEN #
IO_BYTES_READ #
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
index 9b084e63cd5..1ad3ef620db 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -573,9 +573,6 @@ insert into t30 values
('row3', 'row3-key', 'row3-data'),
('row4', 'row4-key', 'row4-data'),
('row5', 'row5-key', 'row5-data');
-analyze table t30;
-Table Op Msg_type Msg_text
-test.t30 analyze status OK
explain
select * from t30 where key1 <='row3-key';
id select_type table type possible_keys key key_len ref rows Extra
@@ -865,7 +862,7 @@ ERROR 42S02: Unknown table 'test.t45'
# Now it fails if there is data overlap with what
# already exists
#
-show variables
+show variables where variable_name like 'rocksdb%' and variable_name not like 'rocksdb_max_open_files';
where
variable_name like 'rocksdb%' and
variable_name not like 'rocksdb_supported_compression_types';
@@ -875,6 +872,7 @@ rocksdb_advise_random_on_open ON
rocksdb_allow_concurrent_memtable_write OFF
rocksdb_allow_mmap_reads OFF
rocksdb_allow_mmap_writes OFF
+rocksdb_allow_to_start_after_corruption OFF
rocksdb_blind_delete_primary_key OFF
rocksdb_block_cache_size 536870912
rocksdb_block_restart_interval 16
@@ -894,7 +892,6 @@ rocksdb_compaction_sequential_deletes 0
rocksdb_compaction_sequential_deletes_count_sd OFF
rocksdb_compaction_sequential_deletes_file_size 0
rocksdb_compaction_sequential_deletes_window 0
-rocksdb_concurrent_prepare ON
rocksdb_create_checkpoint
rocksdb_create_if_missing ON
rocksdb_create_missing_column_families OFF
@@ -918,7 +915,6 @@ rocksdb_enable_ttl_read_filtering ON
rocksdb_enable_write_thread_adaptive_yield OFF
rocksdb_error_if_exists OFF
rocksdb_flush_log_at_trx_commit 0
-rocksdb_flush_memtable_on_analyze ON
rocksdb_force_compute_memtable_stats ON
rocksdb_force_compute_memtable_stats_cachetime 0
rocksdb_force_flush_memtable_and_lzero_now OFF
@@ -926,6 +922,7 @@ rocksdb_force_flush_memtable_now OFF
rocksdb_force_index_records_in_range 0
rocksdb_git_hash #
rocksdb_hash_index_allow_collision ON
+rocksdb_ignore_unknown_options ON
rocksdb_index_type kBinarySearch
rocksdb_info_log_level error_level
rocksdb_io_write_timeout 0
@@ -942,8 +939,7 @@ rocksdb_max_background_jobs 2
rocksdb_max_latest_deadlocks 5
rocksdb_max_log_file_size 0
rocksdb_max_manifest_file_size 18446744073709551615
-rocksdb_max_open_files -1
-rocksdb_max_row_locks 1073741824
+rocksdb_max_row_locks 1048576
rocksdb_max_subcompactions 1
rocksdb_max_total_wal_size 0
rocksdb_merge_buf_size 67108864
@@ -978,6 +974,7 @@ rocksdb_table_cache_numshardbits 6
rocksdb_table_stats_sampling_pct 10
rocksdb_tmpdir
rocksdb_trace_sst_api OFF
+rocksdb_two_write_queues ON
rocksdb_unsafe_for_binlog OFF
rocksdb_update_cf_options
rocksdb_use_adaptive_mutex OFF
@@ -1464,6 +1461,7 @@ Rocksdb_rows_read #
Rocksdb_rows_updated #
Rocksdb_rows_deleted_blind #
Rocksdb_rows_expired #
+rocksdb_rows_filtered #
Rocksdb_system_rows_deleted #
Rocksdb_system_rows_inserted #
Rocksdb_system_rows_read #
@@ -1475,6 +1473,11 @@ Rocksdb_queries_range #
Rocksdb_covered_secondary_key_lookups #
Rocksdb_block_cache_add #
Rocksdb_block_cache_data_hit #
+rocksdb_block_cache_add_failures #
+rocksdb_block_cache_bytes_read #
+rocksdb_block_cache_bytes_write #
+rocksdb_block_cache_data_add #
+rocksdb_block_cache_data_bytes_insert #
Rocksdb_block_cache_data_miss #
Rocksdb_block_cache_filter_hit #
Rocksdb_block_cache_filter_miss #
@@ -1482,12 +1485,18 @@ Rocksdb_block_cache_hit #
Rocksdb_block_cache_index_hit #
Rocksdb_block_cache_index_miss #
Rocksdb_block_cache_miss #
+rocksdb_block_cache_filter_add #
+rocksdb_block_cache_filter_bytes_evict #
+rocksdb_block_cache_filter_bytes_insert #
Rocksdb_block_cachecompressed_hit #
Rocksdb_block_cachecompressed_miss #
Rocksdb_bloom_filter_prefix_checked #
Rocksdb_bloom_filter_prefix_useful #
Rocksdb_bloom_filter_useful #
Rocksdb_bytes_read #
+rocksdb_block_cache_index_add #
+rocksdb_block_cache_index_bytes_evict #
+rocksdb_block_cache_index_bytes_insert #
Rocksdb_bytes_written #
Rocksdb_compact_read_bytes #
Rocksdb_compact_write_bytes #
@@ -1507,12 +1516,16 @@ Rocksdb_number_deletes_filtered #
Rocksdb_number_keys_read #
Rocksdb_number_keys_updated #
Rocksdb_number_keys_written #
+rocksdb_get_hit_l0 #
+rocksdb_get_hit_l1 #
+rocksdb_get_hit_l2_and_up #
Rocksdb_number_merge_failures #
Rocksdb_number_multiget_bytes_read #
Rocksdb_number_multiget_get #
Rocksdb_number_multiget_keys_read #
Rocksdb_number_reseeks_iteration #
Rocksdb_number_sst_entry_delete #
+rocksdb_iter_bytes_read #
Rocksdb_number_sst_entry_merge #
Rocksdb_number_sst_entry_other #
Rocksdb_number_sst_entry_put #
@@ -1521,6 +1534,12 @@ Rocksdb_number_stat_computes #
Rocksdb_number_superversion_acquires #
Rocksdb_number_superversion_cleanups #
Rocksdb_number_superversion_releases #
+rocksdb_number_db_next #
+rocksdb_number_db_next_found #
+rocksdb_number_db_prev #
+rocksdb_number_db_prev_found #
+rocksdb_number_db_seek #
+rocksdb_number_db_seek_found #
Rocksdb_rate_limit_delay_millis #
Rocksdb_snapshot_conflict_errors #
Rocksdb_stall_l0_file_count_limit_slowdowns #
@@ -1541,6 +1560,8 @@ Rocksdb_write_other #
Rocksdb_write_self #
Rocksdb_write_timedout #
Rocksdb_write_wal #
+rocksdb_row_lock_deadlocks #
+rocksdb_row_lock_wait_timeouts #
select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%';
VARIABLE_NAME
ROCKSDB_ROWS_DELETED
@@ -1549,6 +1570,7 @@ ROCKSDB_ROWS_READ
ROCKSDB_ROWS_UPDATED
ROCKSDB_ROWS_DELETED_BLIND
ROCKSDB_ROWS_EXPIRED
+ROCKSDB_ROWS_FILTERED
ROCKSDB_SYSTEM_ROWS_DELETED
ROCKSDB_SYSTEM_ROWS_INSERTED
ROCKSDB_SYSTEM_ROWS_READ
@@ -1559,11 +1581,22 @@ ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
+ROCKSDB_BLOCK_CACHE_ADD_FAILURES
+ROCKSDB_BLOCK_CACHE_BYTES_READ
+ROCKSDB_BLOCK_CACHE_BYTES_WRITE
+ROCKSDB_BLOCK_CACHE_DATA_ADD
+ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
+ROCKSDB_BLOCK_CACHE_FILTER_ADD
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_FILTER_HIT
ROCKSDB_BLOCK_CACHE_FILTER_MISS
ROCKSDB_BLOCK_CACHE_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_ADD
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_INDEX_HIT
ROCKSDB_BLOCK_CACHE_INDEX_MISS
ROCKSDB_BLOCK_CACHE_MISS
@@ -1580,7 +1613,11 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW
ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
ROCKSDB_COMPACTION_KEY_DROP_USER
ROCKSDB_FLUSH_WRITE_BYTES
+ROCKSDB_GET_HIT_L0
+ROCKSDB_GET_HIT_L1
+ROCKSDB_GET_HIT_L2_AND_UP
ROCKSDB_GETUPDATESSINCE_CALLS
+ROCKSDB_ITER_BYTES_READ
ROCKSDB_MEMTABLE_HIT
ROCKSDB_MEMTABLE_MISS
ROCKSDB_NO_FILE_CLOSES
@@ -1588,6 +1625,12 @@ ROCKSDB_NO_FILE_ERRORS
ROCKSDB_NO_FILE_OPENS
ROCKSDB_NUM_ITERATORS
ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED
+ROCKSDB_NUMBER_DB_NEXT
+ROCKSDB_NUMBER_DB_NEXT_FOUND
+ROCKSDB_NUMBER_DB_PREV
+ROCKSDB_NUMBER_DB_PREV_FOUND
+ROCKSDB_NUMBER_DB_SEEK
+ROCKSDB_NUMBER_DB_SEEK_FOUND
ROCKSDB_NUMBER_DELETES_FILTERED
ROCKSDB_NUMBER_KEYS_READ
ROCKSDB_NUMBER_KEYS_UPDATED
@@ -1602,11 +1645,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE
ROCKSDB_NUMBER_SST_ENTRY_OTHER
ROCKSDB_NUMBER_SST_ENTRY_PUT
ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE
-ROCKSDB_NUMBER_STAT_COMPUTES
ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES
ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
-ROCKSDB_RATE_LIMIT_DELAY_MILLIS
+ROCKSDB_ROW_LOCK_DEADLOCKS
+ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
@@ -1636,6 +1679,7 @@ ROCKSDB_ROWS_READ
ROCKSDB_ROWS_UPDATED
ROCKSDB_ROWS_DELETED_BLIND
ROCKSDB_ROWS_EXPIRED
+ROCKSDB_ROWS_FILTERED
ROCKSDB_SYSTEM_ROWS_DELETED
ROCKSDB_SYSTEM_ROWS_INSERTED
ROCKSDB_SYSTEM_ROWS_READ
@@ -1646,11 +1690,22 @@ ROCKSDB_QUERIES_POINT
ROCKSDB_QUERIES_RANGE
ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
+ROCKSDB_BLOCK_CACHE_ADD_FAILURES
+ROCKSDB_BLOCK_CACHE_BYTES_READ
+ROCKSDB_BLOCK_CACHE_BYTES_WRITE
+ROCKSDB_BLOCK_CACHE_DATA_ADD
+ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
+ROCKSDB_BLOCK_CACHE_FILTER_ADD
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_FILTER_HIT
ROCKSDB_BLOCK_CACHE_FILTER_MISS
ROCKSDB_BLOCK_CACHE_HIT
+ROCKSDB_BLOCK_CACHE_INDEX_ADD
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT
+ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT
ROCKSDB_BLOCK_CACHE_INDEX_HIT
ROCKSDB_BLOCK_CACHE_INDEX_MISS
ROCKSDB_BLOCK_CACHE_MISS
@@ -1667,7 +1722,11 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW
ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
ROCKSDB_COMPACTION_KEY_DROP_USER
ROCKSDB_FLUSH_WRITE_BYTES
+ROCKSDB_GET_HIT_L0
+ROCKSDB_GET_HIT_L1
+ROCKSDB_GET_HIT_L2_AND_UP
ROCKSDB_GETUPDATESSINCE_CALLS
+ROCKSDB_ITER_BYTES_READ
ROCKSDB_MEMTABLE_HIT
ROCKSDB_MEMTABLE_MISS
ROCKSDB_NO_FILE_CLOSES
@@ -1675,6 +1734,12 @@ ROCKSDB_NO_FILE_ERRORS
ROCKSDB_NO_FILE_OPENS
ROCKSDB_NUM_ITERATORS
ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED
+ROCKSDB_NUMBER_DB_NEXT
+ROCKSDB_NUMBER_DB_NEXT_FOUND
+ROCKSDB_NUMBER_DB_PREV
+ROCKSDB_NUMBER_DB_PREV_FOUND
+ROCKSDB_NUMBER_DB_SEEK
+ROCKSDB_NUMBER_DB_SEEK_FOUND
ROCKSDB_NUMBER_DELETES_FILTERED
ROCKSDB_NUMBER_KEYS_READ
ROCKSDB_NUMBER_KEYS_UPDATED
@@ -1689,11 +1754,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE
ROCKSDB_NUMBER_SST_ENTRY_OTHER
ROCKSDB_NUMBER_SST_ENTRY_PUT
ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE
-ROCKSDB_NUMBER_STAT_COMPUTES
ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES
ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
-ROCKSDB_RATE_LIMIT_DELAY_MILLIS
+ROCKSDB_ROW_LOCK_DEADLOCKS
+ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result
new file mode 100644
index 00000000000..a245fa851de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result
@@ -0,0 +1,11 @@
+#
+# Issue #728: Assertion `covers_key(b)' failed in int
+# myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&,
+# const rocksdb::Slice&)
+#
+CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT);
+INSERT INTO t2(c1)VALUES(0);
+SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC;
+c1 c2 c3
+0 NULL NULL
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
index d7a4f9dd065..10a6a02008e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result
@@ -7,5 +7,5 @@ count(*)
10000
explain select c1 from t1 where c1 > 5 limit 10;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range i i 9 NULL 9900 Using where; Using index
+1 SIMPLE t1 range i i 9 NULL # Using where; Using index
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
index 1bcd3692b4a..9fc5db98d7d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
@@ -83,12 +83,12 @@ FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT
WHERE TABLE_SCHEMA = 'test'
GROUP BY TABLE_NAME, PARTITION_NAME;
TABLE_SCHEMA TABLE_NAME PARTITION_NAME COUNT(STAT_TYPE)
-test t1 NULL 43
-test t2 NULL 43
-test t4 p0 43
-test t4 p1 43
-test t4 p2 43
-test t4 p3 43
+test t1 NULL 57
+test t2 NULL 57
+test t4 p0 57
+test t4 p1 57
+test t4 p2 57
+test t4 p3 57
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CF_OPTIONS;
CF_NAME OPTION_TYPE VALUE
__system__ COMPARATOR #
@@ -153,9 +153,15 @@ __system__ TABLE_FACTORY::BLOCK_SIZE #
__system__ TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
__system__ TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
__system__ TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+__system__ TABLE_FACTORY::METADATA_BLOCK_SIZE #
+__system__ TABLE_FACTORY::PARTITION_FILTERS #
+__system__ TABLE_FACTORY::USE_DELTA_ENCODING #
__system__ TABLE_FACTORY::FILTER_POLICY #
__system__ TABLE_FACTORY::WHOLE_KEY_FILTERING #
+__system__ TABLE_FACTORY::VERIFY_COMPRESSION #
+__system__ TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
__system__ TABLE_FACTORY::FORMAT_VERSION #
+__system__ TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
cf_t1 COMPARATOR #
cf_t1 MERGE_OPERATOR #
cf_t1 COMPACTION_FILTER #
@@ -218,9 +224,15 @@ cf_t1 TABLE_FACTORY::BLOCK_SIZE #
cf_t1 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
cf_t1 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
cf_t1 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+cf_t1 TABLE_FACTORY::METADATA_BLOCK_SIZE #
+cf_t1 TABLE_FACTORY::PARTITION_FILTERS #
+cf_t1 TABLE_FACTORY::USE_DELTA_ENCODING #
cf_t1 TABLE_FACTORY::FILTER_POLICY #
cf_t1 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+cf_t1 TABLE_FACTORY::VERIFY_COMPRESSION #
+cf_t1 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
cf_t1 TABLE_FACTORY::FORMAT_VERSION #
+cf_t1 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
default COMPARATOR #
default MERGE_OPERATOR #
default COMPACTION_FILTER #
@@ -283,9 +295,15 @@ default TABLE_FACTORY::BLOCK_SIZE #
default TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
default TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
default TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+default TABLE_FACTORY::METADATA_BLOCK_SIZE #
+default TABLE_FACTORY::PARTITION_FILTERS #
+default TABLE_FACTORY::USE_DELTA_ENCODING #
default TABLE_FACTORY::FILTER_POLICY #
default TABLE_FACTORY::WHOLE_KEY_FILTERING #
+default TABLE_FACTORY::VERIFY_COMPRESSION #
+default TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
default TABLE_FACTORY::FORMAT_VERSION #
+default TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
rev:cf_t2 COMPARATOR #
rev:cf_t2 MERGE_OPERATOR #
rev:cf_t2 COMPACTION_FILTER #
@@ -348,9 +366,15 @@ rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE #
rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
rev:cf_t2 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
rev:cf_t2 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+rev:cf_t2 TABLE_FACTORY::METADATA_BLOCK_SIZE #
+rev:cf_t2 TABLE_FACTORY::PARTITION_FILTERS #
+rev:cf_t2 TABLE_FACTORY::USE_DELTA_ENCODING #
rev:cf_t2 TABLE_FACTORY::FILTER_POLICY #
rev:cf_t2 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+rev:cf_t2 TABLE_FACTORY::VERIFY_COMPRESSION #
+rev:cf_t2 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT #
rev:cf_t2 TABLE_FACTORY::FORMAT_VERSION #
+rev:cf_t2 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION #
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
index 7642dcda43f..df048748e05 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
@@ -1,4 +1,19 @@
-CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+create table t1 (pk int primary key) engine=rocksdb;
+show tables;
set session debug_dbug="+d,gen_sql_table_name";
+Tables_in_test
+#mysql50#t1#sql-test
+t1
rename table t1 to t2;
set session debug_dbug= "-d,gen_sql_table_name";
+show tables;
+Tables_in_test
+#mysql50#t1#sql-test
+t2
+show tables;
+Tables_in_test
+create table t2 (pk int primary key) engine=rocksdb;
+show tables;
+Tables_in_test
+t2
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
index 1df6e838bcd..5bb3229de17 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
@@ -51,18 +51,30 @@ INSERT INTO t1 values (3);
INSERT INTO t1 values (5);
INSERT INTO t1 values (7);
set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+4
set global rocksdb_enable_ttl_read_filtering=0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
1
3
5
7
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
set global rocksdb_enable_ttl_read_filtering=1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+4
DROP TABLE t1;
CREATE TABLE t1 (
a int,
@@ -191,20 +203,36 @@ a
connection con2;
set global rocksdb_force_flush_memtable_now=1;
set global rocksdb_compact_cf='default';
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+1
# Switching to connection 1
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
connection con1;
SELECT * FROM t1;
a
1
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
UPDATE t1 set a = a + 1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
2
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+0
COMMIT;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
SELECT * FROM t1;
a
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
+variable_value-@a
+1
DROP TABLE t1;
disconnect con1;
disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
index c9fa716dffc..a7e086fde66 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
@@ -1,4 +1,3 @@
-drop table if exists t1,t2;
#
# A basic test whether endspace-aware variable length encoding
# works when in PK
@@ -756,3 +755,16 @@ email_i 1
drop table t;
set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct;
set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums;
+drop table if exists t;
+Warnings:
+Note 1051 Unknown table 'test.t'
+create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb;
+insert into t(i,h) values('a','b');
+check table t;
+Table Op Msg_type Msg_text
+test.t check status OK
+alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null;
+check table t;
+Table Op Msg_type Msg_text
+test.t check status OK
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
index 3291826b290..f601f75d188 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
@@ -1,3 +1,12 @@
+Checking direct reads
+Checking direct writes
+Checking rocksdb_flush_log_at_trx_commit
+Validate flush_log settings when direct writes is enabled
+set global rocksdb_flush_log_at_trx_commit=0;
+set global rocksdb_flush_log_at_trx_commit=1;
+ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '1'
+set global rocksdb_flush_log_at_trx_commit=2;
+ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '2'
call mtr.add_suppression("rocksdb");
call mtr.add_suppression("Aborting");
# This shows that RocksDB plugin is loaded:
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
index ee23446eec0..d0a9b034927 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
@@ -3,6 +3,7 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+insert aaa(id, i) values(0,1);
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
@@ -16,11 +17,11 @@ insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
3
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
insert aaa(id, i) values(4,1);
-SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
insert aaa(id, i) values(5,1);
truncate table aaa;
drop table aaa;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
index 78ddbe60da5..3977b38d725 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test
@@ -65,7 +65,12 @@ ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
# disable duplicate index warning
--disable_warnings
# now do same index using copy algorithm
+# hitting max row locks (1M)
+--error ER_RDB_STATUS_GENERAL
ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=1;
+ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
+set session rocksdb_bulk_load=0;
--enable_warnings
# checksum testing
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
index 1f3ef49e534..18ccf2e39f6 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test
@@ -135,3 +135,15 @@ set global rocksdb_force_flush_memtable_now = true;
select * from t1;
DROP TABLE t1;
+
+## https://github.com/facebook/mysql-5.6/issues/736
+create table t1 (i int auto_increment, key(i)) engine=rocksdb;
+insert into t1 values();
+insert into t1 values();
+insert into t1 values();
+
+show create table t1;
+--source include/restart_mysqld.inc
+show create table t1;
+
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test
new file mode 100644
index 00000000000..1863c3247b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test
@@ -0,0 +1,75 @@
+--source include/have_rocksdb.inc
+--source include/not_valgrind.inc
+
+--echo #
+--echo # Test how MyRocks behaves when RocksDB reports corrupted data.
+--echo #
+
+--source include/have_debug.inc
+
+# use custom error log to assert on error message in search_pattern_in_file.inc
+--let LOG=$MYSQLTEST_VARDIR/tmp/allow_to_start_after_corruption_debug.err
+--let SEARCH_FILE=$LOG
+
+# restart server to change error log and ignore corruptopn on startup
+--let $_mysqld_option=--log-error=$LOG --rocksdb_allow_to_start_after_corruption=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Test server crashes on corrupted data and restarts
+--echo #
+create table t1 (
+ pk int not null primary key,
+ col1 varchar(10)
+) engine=rocksdb;
+
+insert into t1 values (1,1),(2,2),(3,3);
+
+select * from t1 where pk=1;
+set session debug= "+d,rocksdb_return_status_corrupted";
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+select * from t1 where pk=1;
+--source include/wait_until_disconnected.inc
+--let SEARCH_PATTERN=data corruption detected
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # The same for scan queries
+--echo #
+
+--source include/start_mysqld_with_option.inc
+select * from t1;
+set session debug= "+d,rocksdb_return_status_corrupted";
+--exec echo "wait" > $_expect_file_name
+--error 2013
+select * from t1;
+--source include/wait_until_disconnected.inc
+--let SEARCH_PATTERN=data corruption detected
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # Test restart failure. The server is shutdown at this point.
+--echo #
+
+# remove flag to ignore corruption
+--let $_mysqld_option=--log-error=$LOG
+--error 0
+--exec $MYSQLD_CMD $_mysqld_option
+--let SEARCH_PATTERN=The server will exit normally and stop restart attempts
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+--echo #
+--echo # Remove corruption file and restart cleanly
+--echo #
+
+--exec rm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/.rocksdb/ROCKSDB_CORRUPTED
+--source include/start_mysqld_with_option.inc
+
+drop table t1;
+
+# Restart mysqld with default options
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf
new file mode 100644
index 00000000000..f4257d80fdb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf
@@ -0,0 +1,8 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=1
+rpl_skip_tx_api=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test
new file mode 100644
index 00000000000..b4f329dd1e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test
@@ -0,0 +1,9 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+
+create table t (i int primary key auto_increment) engine=rocksdb;
+
+--source suite/rocksdb/include/autoinc_crash_safe.inc
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf
new file mode 100644
index 00000000000..f4257d80fdb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf
@@ -0,0 +1,8 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=1
+rpl_skip_tx_api=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test
new file mode 100644
index 00000000000..5b6a761dd94
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test
@@ -0,0 +1,10 @@
+--source include/have_rocksdb.inc
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+--source include/have_partition.inc
+
+create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3;
+
+--source suite/rocksdb/include/autoinc_crash_safe.inc
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt
new file mode 100644
index 00000000000..83ed8522e72
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt
@@ -0,0 +1 @@
+--binlog-format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test
new file mode 100644
index 00000000000..df67338a0f1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test
@@ -0,0 +1,118 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/have_log_bin.inc
+
+--echo #
+--echo # Testing upgrading from server without merges for auto_increment
+--echo # to new server with such support.
+--echo #
+
+set debug='+d,myrocks_autoinc_upgrade';
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+delete from t where i > 1;
+select * from t;
+
+select table_name, index_name, auto_increment
+ from information_schema.rocksdb_ddl where table_name = 't';
+
+set debug='-d,myrocks_autoinc_upgrade';
+
+--source include/restart_mysqld.inc
+
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+select table_name, index_name, auto_increment
+ from information_schema.rocksdb_ddl where table_name = 't';
+
+delete from t where i > 1;
+
+--source include/restart_mysqld.inc
+
+insert into t values ();
+insert into t values ();
+insert into t values ();
+select * from t;
+
+drop table t;
+
+--echo #
+--echo # Testing crash safety of transactions.
+--echo #
+create table t (i int primary key auto_increment);
+insert into t values ();
+insert into t values ();
+insert into t values ();
+
+--echo # Before anything
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_before";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+--echo # After engine prepare
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_after_prepare";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+--echo # After binlog
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_after_log";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+--echo # After everything
+begin;
+insert into t values ();
+insert into t values ();
+set debug="+d,crash_commit_after";
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+commit;
+--source include/wait_until_disconnected.inc
+--enable_reconnect
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't';
+select max(i) from t;
+
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
index 2fe0a2e3c08..b8968590155 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
@@ -64,4 +64,42 @@ SELECT LAST_INSERT_ID();
SELECT a FROM t1 ORDER BY a;
DROP TABLE t1;
+--echo #---------------------------
+--echo # test large autoincrement values
+--echo #---------------------------
+SET auto_increment_increment = 1;
+SET auto_increment_offset = 1;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+SET auto_increment_increment = 300;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
+
+SET auto_increment_offset = 200;
+CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (18446744073709551613, 'a');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'b');
+SHOW CREATE TABLE t1;
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL, 'c');
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test b/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test
deleted file mode 100644
index 375571f705d..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test
+++ /dev/null
@@ -1,3 +0,0 @@
---source include/have_rocksdb.inc
-
---echo # The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE.
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
index 6c6c75dd37e..0db5e6d9cc4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
@@ -7,4 +7,4 @@
--let pk_cf=cf1
--let data_order_desc=0
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test
new file mode 100644
index 00000000000..18e40fbf4ab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+
+INSERT INTO t1 VALUES (1);
+
+--connect (con1,localhost,root,,)
+DROP TABLE t1;
+
+--connection default
+--disconnect con1
+
+# This would have crashed the server prior to the fix
+SET rocksdb_bulk_load=0;
+--error ER_NO_SUCH_TABLE
+SELECT * FROM t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
index 4a4c42d1fcd..25b98c5ca17 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
@@ -1,7 +1,13 @@
--source include/have_rocksdb.inc
+--source include/count_sessions.inc
+
+--let LOG1=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.1.err
+--let $_mysqld_option=--log-error=$LOG1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
### Bulk load ###
-CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB;
# Make sure we get an error with out of order keys during bulk load
SET rocksdb_bulk_load=1;
@@ -21,18 +27,51 @@ INSERT INTO t1 VALUES(2);
INSERT INTO t1 VALUES(20);
INSERT INTO t1 VALUES(21);
---echo #
---echo # In MyRocks, the following statement will intentionally crash the server.
---echo # In MariaDB, it will cause an error
--error ER_OVERLAPPING_KEYS
SET rocksdb_bulk_load=0;
+SHOW VARIABLES LIKE 'rocksdb_bulk_load';
+call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+
--echo #
--echo # Despite the error, bulk load operation is over so the variable value
--echo # will be 0:
-select @@rocksdb_bulk_load;
+SELECT * FROM t1;
-call mtr.add_suppression('finalizing last SST file while setting bulk loading variable');
+--let SEARCH_FILE=$LOG1
+--let SEARCH_PATTERN=RocksDB: Error 198 finalizing last SST file while setting bulk loading variable
+--source include/search_pattern_in_file.inc
+
+--let LOG2=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.2.err
+--let $_mysqld_option=--log-error=$LOG2
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+--remove_file $LOG1
+
+
+# Make sure we get an error in log when we disconnect and do not assert the server
+--connect (con1,localhost,root,,)
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+--connection default
+--disconnect con1
+
+SELECT * FROM t1;
+
+--source include/wait_until_count_sessions.inc
+
+--let SEARCH_FILE=$LOG2
+--let SEARCH_PATTERN=RocksDB: Error 198 finalizing last SST file while disconnecting
+--source include/search_pattern_in_file.inc
+
+--let LOG3=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.3.err
+--let $_mysqld_option=--log-error=$LOG3
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+--remove_file $LOG2
TRUNCATE TABLE t1;
@@ -60,3 +99,46 @@ SELECT * FROM t1;
SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
DROP TABLE t1;
+
+# This would trigger a debug assertion that is just an error in release builds
+CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+--error ER_KEYS_OUT_OF_ORDER
+INSERT INTO t1 VALUES (),(),();
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+
+# Crash when table open cache closes handler with bulk load operation not finalized
+SET @orig_table_open_cache=@@global.table_open_cache;
+CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(13, 0);
+INSERT INTO t1 VALUES(2, 'test 2');
+INSERT INTO t1 VALUES(@id, @arg04);
+SET @@global.table_open_cache=FALSE;
+INSERT INTO t1 VALUES(51479+0.333333333,1);
+DROP TABLE t1;
+SET @@global.table_open_cache=@orig_table_open_cache;
+
+--let SEARCH_FILE=$LOG3
+--let SEARCH_PATTERN=RocksDB: Error 198 finalizing bulk load while closing handler
+--source include/search_pattern_in_file.inc
+
+--source include/restart_mysqld.inc
+
+--remove_file $LOG3
+
+# Switch between tables, but also introduce duplicate key errors
+CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1), (2);
+INSERT INTO t2 VALUES (1), (2);
+INSERT INTO t1 VALUES (1);
+--error ER_OVERLAPPING_KEYS
+INSERT INTO t2 VALUES (3);
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+DROP TABLE t2;
+
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
index 7c4d7aef0e5..67d68ac7a2d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
@@ -6,4 +6,4 @@
--let pk_cf=rev:cf1
--let data_order_desc=0
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
index a31e86753f3..7110fe5f1d7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
@@ -6,4 +6,4 @@
--let pk_cf=rev:cf1
--let data_order_desc=1
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
index f36990ed567..6c6e51a2a51 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
@@ -6,4 +6,4 @@
--let pk_cf=cf1
--let data_order_desc=1
---source bulk_load.inc
+--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
index 78bb9312ca5..f4d850d78b4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
@@ -3,136 +3,5 @@
--let pk_cf=cf1
---disable_warnings
-DROP TABLE IF EXISTS t1;
---enable_warnings
-
-SET rocksdb_bulk_load_size=3;
-SET rocksdb_bulk_load_allow_unsorted=1;
-
-### Test individual INSERTs ###
-
-# A table with only a PK won't have rows until the bulk load is finished
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-SET rocksdb_bulk_load=1;
---disable_query_log
-let $sign = 1;
-let $max = 5;
-let $i = 1;
-while ($i <= $max) {
- let $a = 1 + $sign * $i;
- let $b = 1 - $sign * $i;
- let $sign = -$sign;
- let $insert = INSERT INTO t1 VALUES ($a, $b);
- eval $insert;
- inc $i;
-}
---enable_query_log
-SELECT * FROM t1;
-SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
-DROP TABLE t1;
-
-# A table with a PK and a SK shows rows immediately
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b));
-SET rocksdb_bulk_load=1;
---disable_query_log
-let $sign = 1;
-let $max = 5;
-let $i = 1;
-while ($i <= $max) {
- let $a = 1 + $sign * $i;
- let $b = 1 - $sign * $i;
- let $sign = -$sign;
- let $insert = INSERT INTO t1 VALUES ($a, $b);
- eval $insert;
- inc $i;
-}
---enable_query_log
-
-SELECT * FROM t1;
-SET rocksdb_bulk_load=0;
-DROP TABLE t1;
-
-# Inserting into another table finishes bulk load to the previous table
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-
-SET rocksdb_bulk_load=1;
-INSERT INTO t1 VALUES (1,1);
-INSERT INTO t2 VALUES (1,1);
-SELECT * FROM t1;
-INSERT INTO t1 VALUES (2,2);
-SELECT * FROM t2;
-SELECT * FROM t1;
-SET rocksdb_bulk_load=0;
-SELECT * FROM t1;
-DROP TABLE t1, t2;
-
-### Test bulk load from a file ###
-eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
-eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf");
-eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
- PARTITION BY KEY() PARTITIONS 4;
-
---let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
-# Create a text file with data to import into the table.
-# PK and SK are not in any order
---let ROCKSDB_INFILE = $file
-perl;
-my $fn = $ENV{'ROCKSDB_INFILE'};
-open(my $fh, '>', $fn) || die "perl open($fn): $!";
+--source ../include/bulk_load_unsorted.inc
binmode $fh;
-my $max = 5000000;
-my $sign = 1;
-for (my $ii = 0; $ii < $max; $ii++)
-{
- my $a = 1 + $sign * $ii;
- my $b = 1 - $sign * $ii;
- print $fh "$a\t$b\n";
-}
-close($fh);
-EOF
---file_exists $file
-
-# Make sure a snapshot held by another user doesn't block the bulk load
-connect (other,localhost,root,,);
-set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-
-connection default;
-set rocksdb_bulk_load=1;
-set rocksdb_bulk_load_size=100000;
---disable_query_log
---echo LOAD DATA INFILE <input_file> INTO TABLE t1;
-eval LOAD DATA INFILE '$file' INTO TABLE t1;
---echo LOAD DATA INFILE <input_file> INTO TABLE t2;
-eval LOAD DATA INFILE '$file' INTO TABLE t2;
---echo LOAD DATA INFILE <input_file> INTO TABLE t3;
-eval LOAD DATA INFILE '$file' INTO TABLE t3;
---enable_query_log
-set rocksdb_bulk_load=0;
-
---remove_file $file
-
-# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
-SHOW TABLE STATUS WHERE name LIKE 't%';
-
-ANALYZE TABLE t1, t2, t3;
-
---replace_column 6 # 7 # 8 # 9 #
-SHOW TABLE STATUS WHERE name LIKE 't%';
-
-# Make sure all the data is there.
-select count(a) from t1;
-select count(b) from t1;
-select count(a) from t2;
-select count(b) from t2;
-select count(a) from t3;
-select count(b) from t3;
-
-DROP TABLE t1, t2, t3;
-SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test
new file mode 100644
index 00000000000..de9a5c26424
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=rev:cf1
+
+--source ../include/bulk_load_unsorted.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
index 689753faf8d..14a82d7e462 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test
@@ -2,6 +2,48 @@
--source include/restart_mysqld.inc
+# Test memtable cardinality statistics
+CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb;
+
+# populate the table with 10 reconds where cardinality of id is N and a is N/2.
+insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4),
+(5, 4),(6, 4),(7, 4),(8, 4),(9, 4);
+
+# Assert no cardinality data exists before ANALYZE TABLE is done
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="id";
+SELECT cardinality FROM information_schema.statistics where table_name="t0" and
+column_name="a";
+
+--disable_result_log
+ANALYZE TABLE t0;
+--enable_result_log
+
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+
+# Flush the table and re-run the test as statistics is calculated a bit
+# differently for memtable and SST files
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+--disable_result_log
+ANALYZE TABLE t0;
+--enable_result_log
+
+SELECT table_rows into @N FROM information_schema.tables
+WHERE table_name = "t0";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="id";
+SELECT FLOOR(@N/cardinality) FROM
+information_schema.statistics where table_name="t0" and column_name="a";
+
+drop table t0;
+
+# Test big table on SST
+
--disable_warnings
DROP TABLE IF EXISTS t1,t10,t11;
--enable_warnings
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test
new file mode 100644
index 00000000000..25213544bb5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test
@@ -0,0 +1,21 @@
+--disable_warnings
+let $MYSQLD_DATADIR= `select @@datadir`;
+let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err;
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
+
+--exec find $MYSQLD_DATADIR/.rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}"
+
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--shutdown_server 10
+--error 1
+--exec $MYSQLD_CMD --rocksdb_ignore_unknown_options=0 --loose-console > $error_log 2>&1
+
+let SEARCH_FILE= $error_log;
+let SEARCH_PATTERN= RocksDB: Compatibility check against existing database options failed;
+--source include/search_pattern_in_file.inc
+--enable_reconnect
+--exec echo "restart" > $restart_file
+--source include/wait_until_connected_again.inc
+--exec find $MYSQLD_DATADIR/.rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i '/hello=world/d' {}"
+select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options";
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
index d2abcb3b63b..9677d2dbbaa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
@@ -21,29 +21,29 @@ let $con3= `SELECT CONNECTION_ID()`;
connection default;
eval create table t (i int primary key) engine=$engine;
insert into t values (1), (2), (3);
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
echo Deadlock #1;
--source include/simple_deadlock.inc
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
echo Deadlock #2;
--source include/simple_deadlock.inc
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
set global rocksdb_max_latest_deadlocks = 10;
echo Deadlock #3;
--source include/simple_deadlock.inc
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
set global rocksdb_max_latest_deadlocks = 1;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
connection con3;
@@ -77,8 +77,10 @@ let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
where thread_id = $con2 and waiting_key != "";
--source include/wait_condition.inc
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
--error ER_LOCK_DEADLOCK
select * from t where i=1 for update;
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
rollback;
connection con2;
@@ -91,7 +93,7 @@ rollback;
connection default;
set global rocksdb_max_latest_deadlocks = 5;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
echo Deadlock #5;
@@ -133,7 +135,7 @@ connection con3;
rollback;
connection default;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/
show engine rocksdb transaction status;
disconnect con1;
@@ -143,11 +145,11 @@ disconnect con3;
set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
set global rocksdb_deadlock_detect = @prior_deadlock_detect;
drop table t;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
show engine rocksdb transaction status;
set global rocksdb_max_latest_deadlocks = 0;
--echo # Clears deadlock buffer of any existent deadlocks.
set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
show engine rocksdb transaction status;
--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
index 7dc3c207ecc..e1c04980c15 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test
@@ -7,6 +7,7 @@
--disable_warnings
DROP TABLE IF EXISTS is_ddl_t1;
DROP TABLE IF EXISTS is_ddl_t2;
+DROP TABLE IF EXISTS is_ddl_t3;
--enable_warnings
CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT,
@@ -17,8 +18,12 @@ CREATE TABLE is_ddl_t2 (x INT, y INT, z INT,
PRIMARY KEY (z, y) COMMENT 'zy_cf',
KEY (x)) ENGINE = ROCKSDB;
--sorted_result
-SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
+CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB
+ COMMENT "ttl_duration=3600;";
+
+SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
# cleanup
DROP TABLE is_ddl_t1;
DROP TABLE is_ddl_t2;
+DROP TABLE is_ddl_t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test
new file mode 100644
index 00000000000..21558899782
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test
@@ -0,0 +1,158 @@
+--source include/have_rocksdb.inc
+
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+
+# needed by simple_deadlock.inc
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+show create table information_schema.rocksdb_deadlock;
+
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY 6 INDEX_NAME 7 TABLE_NAME
+select * from information_schema.rocksdb_deadlock;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY
+select * from information_schema.rocksdb_deadlock;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
index abf8d71911b..887b4dd6a65 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
@@ -70,14 +70,15 @@ while ($cnt)
SELECT COUNT(*) FROM t1;
+# flush the table first as statistics is calculated a bit differently for memtable and SST files
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+
-- disable_query_log
-- disable_result_log
ANALYZE TABLE t1;
-- enable_result_log
-- enable_query_log
-SET GLOBAL rocksdb_force_flush_memtable_now = 1;
-
--replace_column 9 #
EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100;
UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
@@ -95,8 +96,8 @@ while ($i <= 1000) {
eval $insert;
}
--enable_query_log
-analyze table t1;
set global rocksdb_force_flush_memtable_now=1;
+analyze table t1;
--replace_column 9 #
explain select * from t1 where key1 = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
index a4d26cf7739..2306558ff41 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
@@ -5,7 +5,8 @@
# t/index_merge_innodb.test
#
-# Index merge tests
+# Index merge tests (the test is called 'index_merge_rocksdb2' because
+# 'index_merge_rocksdb' has already existed before copying 'index_merge_innodb')
#
# Last update:
# 2006-08-07 ML test refactored (MySQL 5.1)
@@ -61,6 +62,7 @@ INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1;
-- disable_query_log
-- disable_result_log
+set global rocksdb_force_flush_memtable_now=1;
analyze table t1;
-- enable_result_log
-- enable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt
index a0bf5759ec4..3b7d80662db 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt
@@ -1,4 +1,5 @@
--rocksdb_write_disable_wal=1
+--rocksdb_flush_log_at_trx_commit=0
--rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=256;level0_stop_writes_trigger=256;max_write_buffer_number=16;compression_per_level=kNoCompression;memtable=vector:1024
--rocksdb_override_cf_options=__system__={memtable=skip_list:16}
--rocksdb_compaction_sequential_deletes=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
index 4f1927d366c..d75f1e3c2a8 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
@@ -8,6 +8,38 @@ SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SELECT * FROM t1;
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+DROP TABLE t1;
+
+CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+
+INSERT INTO t1 VALUES (5);
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+INSERT INTO t1 VALUES (1000);
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SELECT * FROM t1;
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
index f1777ea3e93..5288680c3bd 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
@@ -16,16 +16,20 @@ set @@rocksdb_lock_wait_timeout=1;
begin;
--connection con1
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--error ER_LOCK_WAIT_TIMEOUT
insert into t values(0);
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--connection con2
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--error ER_LOCK_WAIT_TIMEOUT
insert into t values(0);
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts';
--disconnect con1
--connection default
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test
new file mode 100644
index 00000000000..c7c5e7b2ef3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test
@@ -0,0 +1,53 @@
+--source include/have_rocksdb.inc
+
+# Basic Sysbench run fails with basic MyROCKS install due to lack of open files
+
+# test for over limit
+CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*");
+
+--let $over_rocksdb_max_open_files=`SELECT @@global.open_files_limit + 100`
+--let $under_rocksdb_max_open_files=`SELECT @@global.open_files_limit -1`
+--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rocksdb.max_open_files.err
+--let SEARCH_PATTERN=RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR $over_rocksdb_max_open_files over_rocksdb_max_open_files
+--let $_mysqld_option=--log-error=$SEARCH_FILE --rocksdb_max_open_files=$over_rocksdb_max_open_files
+--source include/restart_mysqld_with_option.inc
+--source include/search_pattern_in_file.inc
+
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+
+# test for within limit
+--let $_mysqld_option=--rocksdb_max_open_files=$under_rocksdb_max_open_files
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files;
+
+# test for minimal value
+--let $_mysqld_option=--rocksdb_max_open_files=0
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.rocksdb_max_open_files;
+
+# verify that we can still do work with no descriptor cache
+CREATE TABLE t1(a INT) ENGINE=ROCKSDB;
+INSERT INTO t1 VALUES(0),(1),(2),(3),(4);
+SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1;
+DROP TABLE t1;
+
+# test for unlimited
+--let $_mysqld_option=--rocksdb_max_open_files=-1
+--source include/restart_mysqld_with_option.inc
+
+SELECT @@global.rocksdb_max_open_files;
+
+# test for auto-tune
+--let $_mysqld_option=--rocksdb_max_open_files=-2
+--source include/restart_mysqld_with_option.inc
+
+SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files;
+
+# cleanup
+--let _$mysqld_option=
+--source include/restart_mysqld.inc
+--remove_file $SEARCH_FILE
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
index 9e904908330..4947ffb59b8 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
@@ -32,7 +32,7 @@ BEGIN;
insert into r1 values (5,5,5,5,5,5,5,5);
update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1';
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test
rollback;
@@ -44,16 +44,16 @@ source include/search_pattern_in_file.inc;
set @save_default_storage_engine=@@global.default_storage_engine;
SET GLOBAL default_storage_engine=rocksdb;
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test
source include/search_pattern_in_file.inc;
# Sanity test mysqldump when the --innodb-stats-on-metadata is specified (no effect)
--echo ==== mysqldump with --innodb-stats-on-metadata ====
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test
# testing mysqldump work with statement based binary logging
SET GLOBAL binlog_format=statement;
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null
SET GLOBAL binlog_format=row;
drop table r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
index 3631e703de6..ca9eb5d2ecf 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test
@@ -29,7 +29,7 @@ let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_block_cache_add';
---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null
# verifying block cache was not filled
select case when variable_value - @a > 20 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
index 9199c572933..345e29e8df2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -574,7 +574,6 @@ insert into t30 values
('row3', 'row3-key', 'row3-data'),
('row4', 'row4-key', 'row4-data'),
('row5', 'row5-key', 'row5-data');
-analyze table t30;
--replace_column 9 #
explain
@@ -786,11 +785,16 @@ drop table t45;
--echo # Now it fails if there is data overlap with what
--echo # already exists
--echo #
+# We exclude rocksdb_max_open_files here because it value is dependent on
+# the value of the servers open_file_limit and is expected to be different
+# across distros and installs
+show variables where variable_name like 'rocksdb%' and variable_name not like 'rocksdb_max_open_files';
--replace_regex /[a-f0-9]{40}/#/
show variables
where
variable_name like 'rocksdb%' and
+ variable_name not like 'rocksdb_max_open_files' and
variable_name not like 'rocksdb_supported_compression_types';
create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test
new file mode 100644
index 00000000000..7cd4e09e946
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test
@@ -0,0 +1,14 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+--echo #
+--echo # Issue #728: Assertion `covers_key(b)' failed in int
+--echo # myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&,
+--echo # const rocksdb::Slice&)
+--echo #
+
+CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT);
+INSERT INTO t2(c1)VALUES(0);
+SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC;
+DROP TABLE t2;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
index 6b8d0b90e90..a7ac236451e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test
@@ -15,6 +15,7 @@ while ($i<10000)
--enable_query_log
analyze table t1;
select count(*) from t1;
+--replace_column 9 #
explain select c1 from t1 where c1 > 5 limit 10;
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
index 945b0079cce..80c366d26b0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
@@ -1,39 +1,36 @@
--source include/have_rocksdb.inc
--source include/have_debug.inc
-# Write file to make mysql-test-run.pl expect the "crash", but don't restart the
-# server until it is told to
--let $_server_id= `SELECT @@server_id`
---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
-CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+create table t1 (pk int primary key) engine=rocksdb;
# Create a .frm file without a matching table
--exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm
-# Restart the server with a .frm file exist but that table is not registered in RocksDB
---exec echo "wait" >$_expect_file_name
-shutdown_server 10;
---exec echo "restart" >$_expect_file_name
---sleep 5
---enable_reconnect
---source include/wait_until_connected_again.inc
---disable_reconnect
+--source include/restart_mysqld.inc
+
+show tables;
# This will append '#sql-test' to the end of new name
set session debug_dbug="+d,gen_sql_table_name";
rename table t1 to t2;
set session debug_dbug= "-d,gen_sql_table_name";
+show tables;
+
# Remove the corresponding .frm files
--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm
--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm
# Restart the server with a table registered in RocksDB but does not have a .frm file
---exec echo "wait" >$_expect_file_name
-shutdown_server 10;
---exec echo "restart" >$_expect_file_name
---sleep 5
---enable_reconnect
---source include/wait_until_connected_again.inc
---disable_reconnect
+--source include/restart_mysqld.inc
+
+show tables;
+
+# try to recreate a table with the same name
+create table t2 (pk int primary key) engine=rocksdb;
+
+show tables;
+
+drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
index 5a694b7b222..4e8b081c4d5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
@@ -78,22 +78,28 @@ INSERT INTO t1 values (7);
set global rocksdb_debug_ttl_rec_ts = 0;
# should return nothing.
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
# disable filtering
set global rocksdb_enable_ttl_read_filtering=0;
# should return everything
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
-# disable filtering
+# enable filtering
set global rocksdb_enable_ttl_read_filtering=1;
# should return nothing.
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
DROP TABLE t1;
@@ -286,28 +292,37 @@ SELECT * FROM t1; # <= shouldn't be filtered out here
--echo # Switching to connection 2
connection con2;
-# compaction doesn't do anythign since con1 snapshot is still open
+# compaction doesn't do anything since con1 snapshot is still open
set global rocksdb_force_flush_memtable_now=1;
set global rocksdb_compact_cf='default';
# read filtered out, because on a different connection, on
# this connection the records have 'expired' already so they are filtered out
# even though they have not yet been removed by compaction
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--echo # Switching to connection 1
connection con1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1; # <= shouldn't be filtered out here
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
UPDATE t1 set a = a + 1;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result
SELECT * FROM t1; # <= shouldn't be filtered out here
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
COMMIT;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
--sorted_result # <= filtered out here because time has passed.
SELECT * FROM t1;
+select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered';
DROP TABLE t1;
disconnect con1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
index e45b6836f67..b631615c266 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test
@@ -1,9 +1,5 @@
--source include/have_rocksdb.inc
---disable_warnings
-drop table if exists t1,t2;
---enable_warnings
-
#
# VARCHAR column types
#
@@ -73,3 +69,14 @@ select 'email_i' as index_name, count(*) AS count from t force index(email_i);
drop table t;
set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct;
set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums;
+
+# Issue #784 - Skip trailing space bytes for non-unpackable fields
+
+drop table if exists t;
+create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb;
+insert into t(i,h) values('a','b');
+check table t;
+alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null;
+check table t;
+drop table t;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
index 7a053c659b2..550cbd2753b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
@@ -11,37 +11,52 @@ select plugin_name, plugin_type from information_schema.plugins where plugin_nam
# caused an assertion in RocksDB. Now it should not be allowed and ROCKSDB
# plugin will not load in such configuration.
#
-# We want the server to still start, so we specify default-storage-engine=myisam
+--let LOG=$MYSQLTEST_VARDIR/tmp/use_direct_reads_writes.err
+--let SEARCH_FILE=$LOG
---let $_mysqld_option=--rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1 --default-storage-engine=myisam
---source include/restart_mysqld_with_option.inc
+--echo Checking direct reads
+--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
---echo # Check that ROCKSDB plugin is not loaded:
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+--let SEARCH_PATTERN=enable both use_direct_reads
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
---echo # Check that MyRocks has printed an error message into server error log:
-let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err;
-let SEARCH_PATTERN=enable both use_direct_reads;
-source include/search_pattern_in_file.inc;
---echo # Now, restart the server back with regular settings
---source include/restart_mysqld.inc
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+# Repeat with direct-writes
+--echo Checking direct writes
+--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
---echo #
---echo # Now, repeat the same with another set of invalid arguments
---echo #
---let $_mysqld_option=--rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1 --default-storage-engine=myisam
---source include/restart_mysqld_with_option.inc
+--let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
---echo # Check that ROCKSDB plugin is not loaded:
select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
-let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err;
-let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction;
-source include/search_pattern_in_file.inc;
+# Verify invalid direct-writes and --rocksdb_flush_log_at_trx_commit combination at startup fails
+--echo Checking rocksdb_flush_log_at_trx_commit
+--let $_mysqld_option=--log-error=$LOG --rocksdb_flush_log_at_trx_commit=1 --rocksdb_allow_mmap_writes=1
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_invalid_option.inc
---echo # Now, restart the server back with regular settings
---source include/restart_mysqld.inc
-select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB';
+--let SEARCH_PATTERN=rocksdb_flush_log_at_trx_commit needs to be
+--source include/search_pattern_in_file.inc
+--remove_file $LOG
+
+
+# Verify rocksdb_flush_log_at_trx_commit cannot be changed if direct writes are used
+--echo Validate flush_log settings when direct writes is enabled
+--let $_mysqld_option=--rocksdb_flush_log_at_trx_commit=0 --rocksdb_allow_mmap_writes=1
+--source include/restart_mysqld_with_option.inc
+
+set global rocksdb_flush_log_at_trx_commit=0;
+--error ER_WRONG_VALUE_FOR_VAR
+set global rocksdb_flush_log_at_trx_commit=1;
+--error ER_WRONG_VALUE_FOR_VAR
+set global rocksdb_flush_log_at_trx_commit=2;
+# Cleanup
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
index c20bb1fc89c..e97a0b0bcc9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
@@ -7,7 +7,8 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
---exec sleep 5
+insert aaa(id, i) values(0,1);
+
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
@@ -16,18 +17,16 @@ select variable_value-@a from information_schema.global_status where variable_na
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
---exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
insert aaa(id, i) values(4,1);
let $status_var=rocksdb_wal_synced;
let $status_var_value=`select @a+1`;
source include/wait_for_status_var.inc;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
---exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
insert aaa(id, i) values(5,1);
let $status_var=rocksdb_wal_synced;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
index 8f03c16e2f1..d983bdf8b58 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc
@@ -30,6 +30,7 @@ INSERT INTO t1 VALUES(1, 1);
connection slave;
--let $slave_sql_errno= 1062
--let $not_switch_connection= 0
+--let $slave_timeout= 120
--source include/wait_for_slave_sql_error_and_skip.inc
set global reset_seconds_behind_master=0;
--source include/stop_slave_io.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
index 3d734c9498d..89e93f6b8f0 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
@@ -10,6 +10,7 @@ insert into r1 values (1, 1000);
set global rocksdb_force_flush_memtable_now=1;
include/rpl_start_server.inc [server_number=2]
include/start_slave.inc
+insert into r1 values (2,2000);
delete r1 from r1 force index (i) where id2=1000;
select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
id1 id2
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
index 6143824eea6..ff484171213 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
@@ -62,6 +62,7 @@ SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
+--error 0,2013
SET DEBUG_SYNC='now SIGNAL go';
--source include/wait_until_disconnected.inc
--enable_reconnect
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
index 9180afa881f..6d953ead4e9 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
@@ -53,8 +53,14 @@ EOF
--source include/rpl_start_server.inc
--source include/start_slave.inc
+
+# Due to the binlogs being truncated, the slave may still think it's processed up to
+# the truncated binlog and select master_pos_wait() can return prematurely. Add
+# a new transaction to the master to force master_pos_wait() to wait.
connection master;
+insert into r1 values (2,2000);
sync_slave_with_master;
+
connection slave;
delete r1 from r1 force index (i) where id2=1000;
select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
index 3d76e035e05..9f161b18c05 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result
@@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
stop slave;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
index 3d76e035e05..9f161b18c05 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result
@@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
stop slave;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
index 20098f49b42..c1d3e7fb81c 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py
@@ -95,6 +95,8 @@ LOADERS_READY = 0
REQUEST_ID = 1
REQUEST_ID_LOCK = threading.Lock()
+INSERT_ID_SET = set()
+
def get_next_request_id():
global REQUEST_ID
with REQUEST_ID_LOCK:
@@ -302,10 +304,19 @@ class PopulateWorker(WorkerThread):
execute(self.cur, stmt)
if i % 101 == 0:
self.con.commit()
+ check_id(self.con.insert_id())
self.con.commit()
+ check_id(self.con.insert_id())
logging.info("Inserted %d rows starting at id %d" %
(self.num_to_add, self.start_id))
+def check_id(id):
+ if id == 0:
+ return
+ if id in INSERT_ID_SET:
+ raise Exception("Duplicate auto_inc id %d" % id)
+ INSERT_ID_SET.add(id)
+
def populate_table(num_records):
logging.info("Populate_table started for %d records" % num_records)
@@ -422,6 +433,7 @@ class LoadGenWorker(WorkerThread):
execute(self.cur, gen_insert(self.table, idx, self.thread_id,
request_id, 0))
self.con.commit()
+ check_id(self.con.insert_id())
self.id_map.append(request_id)
@@ -687,6 +699,7 @@ class LoadGenWorker(WorkerThread):
else:
self.cur_txn_state = self.TXN_COMMIT_STARTED
self.con.commit()
+ check_id(self.con.insert_id())
if not self.con.get_server_info():
raise MySQLdb.OperationalError(MySQLdb.constants.CR.CONNECTION_ERROR,
"Possible connection error on commit")
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
index 7d92bb3f83a..307211a124d 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test
@@ -17,6 +17,8 @@ CREATE TABLE t1(id INT PRIMARY KEY,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+ auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
index 6f6128579b5..8ef4c73c3b0 100644
--- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
+++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test
@@ -18,6 +18,8 @@ CREATE TABLE t1(id INT PRIMARY KEY,
msg VARCHAR(1024),
msg_length int,
msg_checksum varchar(128),
+ auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
+ KEY(auto_inc),
KEY msg_i(msg(255), zero_sum))
ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result
new file mode 100644
index 00000000000..086010dc79e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION = 444;
+ERROR HY000: Variable 'rocksdb_allow_to_start_after_corruption' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
index ede02afcb60..9af4f730a21 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result
@@ -1,7 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
SET @start_global_value = @@global.ROCKSDB_BYTES_PER_SYNC;
SELECT @start_global_value;
@start_global_value
0
-"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_BYTES_PER_SYNC = 444;
-ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a read only variable
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 100"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 100;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 1"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 1;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 0"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 0;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@session.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_BYTES_PER_SYNC = 444;
+ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'aaa'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'bbb'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '-1'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '101'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '484436'"
+SET @@global.ROCKSDB_BYTES_PER_SYNC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+SET @@global.ROCKSDB_BYTES_PER_SYNC = @start_global_value;
+SELECT @@global.ROCKSDB_BYTES_PER_SYNC;
+@@global.ROCKSDB_BYTES_PER_SYNC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result
deleted file mode 100644
index 905feec9b1a..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result
+++ /dev/null
@@ -1,58 +0,0 @@
-drop table if exists t1;
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-Table Create Table
-t1 CREATE TABLE `t1` (
- `a` int(11) NOT NULL AUTO_INCREMENT,
- `b` int(11) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
-SELECT * FROM t1;
-a b
-1 1
-2 2
-3 3
-set session rocksdb_flush_memtable_on_analyze=off;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status OK
-SHOW INDEXES FROM t1;
-Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
-t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE
-set session rocksdb_flush_memtable_on_analyze=on;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status OK
-SHOW INDEXES FROM t1;
-Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
-t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE
-DROP TABLE t1;
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-Table Create Table
-t1 CREATE TABLE `t1` (
- `a` int(11) NOT NULL AUTO_INCREMENT,
- `b` int(11) DEFAULT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
-SELECT * FROM t1;
-a b
-1 1
-2 2
-3 3
-SHOW TABLE STATUS LIKE 't1';
-Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status OK
-SHOW TABLE STATUS LIKE 't1';
-Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
-DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result
new file mode 100644
index 00000000000..621213cd79b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS = 444;
+ERROR HY000: Variable 'rocksdb_ignore_unknown_options' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
index b058ebf05f8..60f505310c6 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result
@@ -1,7 +1,3 @@
-SET @start_global_value = @@global.ROCKSDB_MAX_OPEN_FILES;
-SELECT @start_global_value;
-@start_global_value
--1
-"Trying to set variable @@global.ROCKSDB_MAX_OPEN_FILES to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_MAX_OPEN_FILES = 444;
-ERROR HY000: Variable 'rocksdb_max_open_files' is a read only variable
+show variables like 'rocksdb_max_open_files';
+Variable_name Value
+rocksdb_max_open_files #
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
index e417e4d5c4e..c925a68d4ed 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result
@@ -6,11 +6,11 @@ INSERT INTO invalid_values VALUES('\'aaa\'');
SET @start_global_value = @@global.ROCKSDB_MAX_ROW_LOCKS;
SELECT @start_global_value;
@start_global_value
-1073741824
+1048576
SET @start_session_value = @@session.ROCKSDB_MAX_ROW_LOCKS;
SELECT @start_session_value;
@start_session_value
-1073741824
+1048576
'# Setting to valid values in global scope#'
"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1"
SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1;
@@ -21,7 +21,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1024"
SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1024;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@ -31,7 +31,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
'# Setting to valid values in session scope#'
"Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1"
SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1;
@@ -42,7 +42,7 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@session.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
"Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1024"
SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1024;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@ -52,21 +52,21 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@session.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
'# Testing with invalid values in global scope #'
"Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 'aaa'"
SET @@global.ROCKSDB_MAX_ROW_LOCKS = 'aaa';
Got one of the listed errors
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
SET @@global.ROCKSDB_MAX_ROW_LOCKS = @start_global_value;
SELECT @@global.ROCKSDB_MAX_ROW_LOCKS;
@@global.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
SET @@session.ROCKSDB_MAX_ROW_LOCKS = @start_session_value;
SELECT @@session.ROCKSDB_MAX_ROW_LOCKS;
@@session.ROCKSDB_MAX_ROW_LOCKS
-1073741824
+1048576
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result
index 11d4f2363f6..5a19016bf91 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result
@@ -3,12 +3,12 @@ INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(1024);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
-SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE;
+SET @start_global_value = @@global.ROCKSDB_TWO_WRITE_QUEUES;
SELECT @start_global_value;
@start_global_value
1
-"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444;
-ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable
+"Trying to set variable @@global.ROCKSDB_TWO_WRITE_QUEUES to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TWO_WRITE_QUEUES = 444;
+ERROR HY000: Variable 'rocksdb_two_write_queues' is a read only variable
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result
new file mode 100644
index 00000000000..126b4cffe8b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result
@@ -0,0 +1,38 @@
+CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS="";
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf2={write_buffer_size=8m;target_file_size_base=2m};
+DROP TABLE t1;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m};
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+Variable_name Value
+rocksdb_update_cf_options
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
index 5ad5394db29..ba24fafd0ec 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
@@ -32,10 +32,19 @@ SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
NULL
-SET @@global.rocksdb_update_cf_options = 'aaaaa';
+SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
NULL
+SET @@global.rocksdb_update_cf_options = '';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+
+SET @@global.rocksdb_update_cf_options = 'aaaaa';;
+ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'aaaaa'
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
CF_NAME OPTION_TYPE VALUE
default WRITE_BUFFER_SIZE 67108864
@@ -100,7 +109,12 @@ cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
CF_NAME OPTION_TYPE VALUE
cf1 TARGET_FILE_SIZE_BASE 25165824
-SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';;
+ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'default={foo=bar};'
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
+SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
NULL
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
index 7da628b73fd..f432f1f7750 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result
@@ -1,7 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
SET @start_global_value = @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
SELECT @start_global_value;
@start_global_value
0
-"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 444;
-ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a read only variable
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 100"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 100;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 1"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 1;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 0"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 0;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@session.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_WAL_BYTES_PER_SYNC = 444;
+ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'aaa'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'bbb'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '-1'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '101'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '484436'"
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = @start_global_value;
+SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC;
+@@global.ROCKSDB_WAL_BYTES_PER_SYNC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test
new file mode 100644
index 00000000000..64fb2458424
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
index d1d6b2b5695..bf78f578b6c 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test
@@ -1,7 +1,22 @@
--source include/have_rocksdb.inc
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
--let $sys_var=ROCKSDB_BYTES_PER_SYNC
---let $read_only=1
+--let $read_only=0
--let $session=0
--source include/rocksdb_sys_var.inc
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test
deleted file mode 100644
index 574375cd1ea..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test
+++ /dev/null
@@ -1,46 +0,0 @@
---source include/have_rocksdb.inc
-
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-##
-## test cardinality for analyze statements after flushing table
-##
-
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
---sorted_result
-SELECT * FROM t1;
-
-set session rocksdb_flush_memtable_on_analyze=off;
-ANALYZE TABLE t1;
-SHOW INDEXES FROM t1;
-
-set session rocksdb_flush_memtable_on_analyze=on;
-ANALYZE TABLE t1;
-SHOW INDEXES FROM t1;
-DROP TABLE t1;
-
-##
-## test data length for show table status statements for tables with few rows
-##
-
-CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-INSERT INTO t1 (b) VALUES (1);
-INSERT INTO t1 (b) VALUES (2);
-INSERT INTO t1 (b) VALUES (3);
---sorted_result
-SELECT * FROM t1;
-
---replace_column 5 # 6 # 7 #
-SHOW TABLE STATUS LIKE 't1';
-ANALYZE TABLE t1;
---replace_column 5 # 6 # 7 #
-SHOW TABLE STATUS LIKE 't1';
-
-DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test
new file mode 100644
index 00000000000..f10ff2c6123
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_IGNORE_UNKNOWN_OPTIONS
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
index ba3293264ab..36996761507 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test
@@ -1,6 +1,8 @@
--source include/have_rocksdb.inc
---let $sys_var=ROCKSDB_MAX_OPEN_FILES
---let $read_only=1
---let $session=0
---source include/rocksdb_sys_var.inc
+# We can not use rocksdb_sys_var.inc here as this is a global, read only option
+# whose value is dependent on the servers open_files_limit. It is more fully
+# tested in the rocksdb.max_open_files test.
+
+--replace_column 2 #
+show variables like 'rocksdb_max_open_files';
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test
index 451653fe769..43579faba82 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test
@@ -7,7 +7,7 @@ INSERT INTO valid_values VALUES(1024);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
---let $sys_var=ROCKSDB_CONCURRENT_PREPARE
+--let $sys_var=ROCKSDB_TWO_WRITE_QUEUES
--let $read_only=1
--let $session=0
--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test
new file mode 100644
index 00000000000..03626260cab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS="";
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+DROP TABLE t1;
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};';
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT;
+SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options';
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
index 0e675dafed3..533b2db8204 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
@@ -39,8 +39,17 @@ SELECT @@global.rocksdb_update_cf_options;
SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
+# Make sure that we do not double free the NULL string
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+
+# Attempt setting an empty string
+SET @@global.rocksdb_update_cf_options = '';
+SELECT @@global.rocksdb_update_cf_options;
+
# Will fail to parse. Value not updated.
-SET @@global.rocksdb_update_cf_options = 'aaaaa';
+--Error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.rocksdb_update_cf_options = 'aaaaa';
SELECT @@global.rocksdb_update_cf_options;
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
@@ -87,7 +96,11 @@ SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FIL
# Will fail to parse. No valid assignments included. Value not updated and
# reset to NULL.
-SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+--Error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
USE test;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
index afab0f20d40..9c2a1f4f391 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test
@@ -1,6 +1,22 @@
--source include/have_rocksdb.inc
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
--let $sys_var=ROCKSDB_WAL_BYTES_PER_SYNC
---let $read_only=1
+--let $read_only=0
--let $session=0
--source include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc
index 368e6d88d1c..92396b23f7e 100644
--- a/storage/rocksdb/properties_collector.cc
+++ b/storage/rocksdb/properties_collector.cc
@@ -54,17 +54,9 @@ Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
: m_cf_id(cf_id), m_ddl_manager(ddl_manager), m_last_stats(nullptr),
m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l),
m_file_size(0), m_params(params),
- m_table_stats_sampling_pct(table_stats_sampling_pct),
- m_seed(time(nullptr)), m_card_adj_extra(1.) {
+ m_cardinality_collector(table_stats_sampling_pct) {
DBUG_ASSERT(ddl_manager != nullptr);
- // We need to adjust the index cardinality numbers based on the sampling
- // rate so that the output of "SHOW INDEX" command will reflect reality
- // more closely. It will still be an approximation, just a better one.
- if (m_table_stats_sampling_pct > 0) {
- m_card_adj_extra = 100. / m_table_stats_sampling_pct;
- }
-
m_deleted_rows_window.resize(m_params.m_window, false);
}
@@ -147,7 +139,7 @@ Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) {
m_last_stats->m_name = m_keydef->get_name();
}
}
- m_last_key.clear();
+ m_cardinality_collector.Reset();
}
return m_last_stats;
@@ -157,7 +149,7 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key,
const rocksdb::Slice &value,
const rocksdb::EntryType &type,
const uint64_t &file_size) {
- const auto stats = AccessStats(key);
+ auto stats = AccessStats(key);
stats->m_data_size += key.size() + value.size();
@@ -183,38 +175,15 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key,
sql_print_error("RocksDB: Unexpected entry type found: %u. "
"This should not happen so aborting the system.",
type);
- abort_with_stack_traces();
+ abort();
break;
}
stats->m_actual_disk_size += file_size - m_file_size;
m_file_size = file_size;
- if (m_keydef != nullptr && ShouldCollectStats()) {
- std::size_t column = 0;
- bool new_key = true;
-
- if (!m_last_key.empty()) {
- rocksdb::Slice last(m_last_key.data(), m_last_key.size());
- new_key = (m_keydef->compare_keys(&last, &key, &column) == 0);
- }
-
- if (new_key) {
- DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size());
-
- for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) {
- stats->m_distinct_keys_per_prefix[i]++;
- }
-
- // assign new last_key for the next call
- // however, we only need to change the last key
- // if one of the first n-1 columns is different
- // If the n-1 prefix is the same, no sense in storing
- // the new key
- if (column < stats->m_distinct_keys_per_prefix.size()) {
- m_last_key.assign(key.data(), key.size());
- }
- }
+ if (m_keydef != nullptr) {
+ m_cardinality_collector.ProcessKey(key, m_keydef.get(), stats);
}
}
@@ -261,8 +230,10 @@ Rdb_tbl_prop_coll::Finish(rocksdb::UserCollectedProperties *const properties) {
rocksdb_num_sst_entry_other += num_sst_entry_other;
}
- properties->insert({INDEXSTATS_KEY,
- Rdb_index_stats::materialize(m_stats, m_card_adj_extra)});
+ for (Rdb_index_stats &stat : m_stats) {
+ m_cardinality_collector.AdjustStats(&stat);
+ }
+ properties->insert({INDEXSTATS_KEY, Rdb_index_stats::materialize(m_stats)});
return rocksdb::Status::OK();
}
@@ -272,23 +243,6 @@ bool Rdb_tbl_prop_coll::NeedCompact() const {
(m_max_deleted_rows > m_params.m_deletes);
}
-bool Rdb_tbl_prop_coll::ShouldCollectStats() {
- // Zero means that we'll use all the keys to update statistics.
- if (!m_table_stats_sampling_pct ||
- RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct) {
- return true;
- }
-
- const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX -
- RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) +
- RDB_TBL_STATS_SAMPLE_PCT_MIN;
-
- DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN);
- DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX);
-
- return val <= m_table_stats_sampling_pct;
-}
-
/*
Returns the same as above, but in human-readable way for logging
*/
@@ -365,8 +319,7 @@ void Rdb_tbl_prop_coll::read_stats_from_tbl_props(
Serializes an array of Rdb_index_stats into a network string.
*/
std::string
-Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats,
- const float card_adj_extra) {
+Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats) {
String ret;
rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES);
for (const auto &i : stats) {
@@ -382,8 +335,7 @@ Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats,
rdb_netstr_append_uint64(&ret, i.m_entry_merges);
rdb_netstr_append_uint64(&ret, i.m_entry_others);
for (const auto &num_keys : i.m_distinct_keys_per_prefix) {
- const float upd_num_keys = num_keys * card_adj_extra;
- rdb_netstr_append_uint64(&ret, static_cast<int64_t>(upd_num_keys));
+ rdb_netstr_append_uint64(&ret, num_keys);
}
}
@@ -416,7 +368,7 @@ int Rdb_index_stats::unmaterialize(const std::string &s,
sql_print_error("Index stats version %d was outside of supported range. "
"This should not happen so aborting the system.",
version);
- abort_with_stack_traces();
+ abort();
}
size_t needed = sizeof(stats.m_gl_index_id.cf_id) +
@@ -521,4 +473,75 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment,
}
}
+Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct)
+ : m_table_stats_sampling_pct(table_stats_sampling_pct),
+ m_seed(time(nullptr)) {}
+
+bool Rdb_tbl_card_coll::IsSampingDisabled() {
+ // Zero means that we'll use all the keys to update statistics.
+ return m_table_stats_sampling_pct == 0 ||
+ RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct;
+}
+
+bool Rdb_tbl_card_coll::ShouldCollectStats() {
+ if (IsSampingDisabled()) {
+ return true; // collect every key
+ }
+
+ const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX -
+ RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) +
+ RDB_TBL_STATS_SAMPLE_PCT_MIN;
+
+ DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN);
+ DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX);
+
+ return val <= m_table_stats_sampling_pct;
+}
+
+void Rdb_tbl_card_coll::ProcessKey(const rocksdb::Slice &key,
+ const Rdb_key_def *keydef,
+ Rdb_index_stats *stats) {
+ if (ShouldCollectStats()) {
+ std::size_t column = 0;
+ bool new_key = true;
+
+ if (!m_last_key.empty()) {
+ rocksdb::Slice last(m_last_key.data(), m_last_key.size());
+ new_key = (keydef->compare_keys(&last, &key, &column) == 0);
+ }
+
+ if (new_key) {
+ DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size());
+
+ for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) {
+ stats->m_distinct_keys_per_prefix[i]++;
+ }
+
+ // assign new last_key for the next call
+ // however, we only need to change the last key
+ // if one of the first n-1 columns is different
+ // If the n-1 prefix is the same, no sense in storing
+ // the new key
+ if (column < stats->m_distinct_keys_per_prefix.size()) {
+ m_last_key.assign(key.data(), key.size());
+ }
+ }
+ }
+}
+
+void Rdb_tbl_card_coll::Reset() { m_last_key.clear(); }
+
+// We need to adjust the index cardinality numbers based on the sampling
+// rate so that the output of "SHOW INDEX" command will reflect reality
+// more closely. It will still be an approximation, just a better one.
+void Rdb_tbl_card_coll::AdjustStats(Rdb_index_stats *stats) {
+ if (IsSampingDisabled()) {
+ // no sampling was done, return as stats is
+ return;
+ }
+ for (int64_t &num_keys : stats->m_distinct_keys_per_prefix) {
+ num_keys = num_keys * 100 / m_table_stats_sampling_pct;
+ }
+}
+
} // namespace myrocks
diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h
index 9ae519d95c7..1441d893420 100644
--- a/storage/rocksdb/properties_collector.h
+++ b/storage/rocksdb/properties_collector.h
@@ -56,8 +56,7 @@ struct Rdb_index_stats {
std::vector<int64_t> m_distinct_keys_per_prefix;
std::string m_name; // name is not persisted
- static std::string materialize(const std::vector<Rdb_index_stats> &stats,
- const float card_adj_extra);
+ static std::string materialize(const std::vector<Rdb_index_stats> &stats);
static int unmaterialize(const std::string &s,
std::vector<Rdb_index_stats> *const ret);
@@ -71,6 +70,40 @@ struct Rdb_index_stats {
const int64_t &estimated_data_len = 0);
};
+// The helper class to calculate index cardinality
+class Rdb_tbl_card_coll {
+ public:
+ explicit Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct);
+
+ public:
+ void ProcessKey(const rocksdb::Slice &key, const Rdb_key_def *keydef,
+ Rdb_index_stats *stats);
+ /*
+ * Resets the state of the collector to start calculating statistics for a
+ * next index.
+ */
+ void Reset();
+
+ /*
+ * Cardinality statistics might be calculated using some sampling strategy.
+ * This method adjusts gathered statistics according to the sampling
+ * strategy used. Note that adjusted cardinality value is just an estimate
+ * and can return a value exeeding number of rows in a table, so the
+ * returned value should be capped by row count before using it by
+ * an optrimizer or displaying it to a clent.
+ */
+ void AdjustStats(Rdb_index_stats *stats);
+
+ private:
+ bool ShouldCollectStats();
+ bool IsSampingDisabled();
+
+ private:
+ std::string m_last_key;
+ uint8_t m_table_stats_sampling_pct;
+ unsigned int m_seed;
+};
+
class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector {
public:
Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
@@ -130,9 +163,7 @@ private:
uint64_t m_rows, m_window_pos, m_deleted_rows, m_max_deleted_rows;
uint64_t m_file_size;
Rdb_compact_params m_params;
- uint8_t m_table_stats_sampling_pct;
- unsigned int m_seed;
- float m_card_adj_extra;
+ Rdb_tbl_card_coll m_cardinality_collector;
};
class Rdb_tbl_prop_coll_factory
diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc
index e608580c666..4d97ab8058d 100644
--- a/storage/rocksdb/rdb_cf_options.cc
+++ b/storage/rocksdb/rdb_cf_options.cc
@@ -325,6 +325,13 @@ Rdb_cf_options::get_cf_comparator(const std::string &cf_name) {
}
}
+std::shared_ptr<rocksdb::MergeOperator>
+Rdb_cf_options::get_cf_merge_operator(const std::string &cf_name) {
+ return (cf_name == DEFAULT_SYSTEM_CF_NAME)
+ ? std::make_shared<Rdb_system_merge_op>()
+ : nullptr;
+}
+
void Rdb_cf_options::get_cf_options(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts) {
DBUG_ASSERT(opts != nullptr);
@@ -334,6 +341,7 @@ void Rdb_cf_options::get_cf_options(const std::string &cf_name,
// Set the comparator according to 'rev:'
opts->comparator = get_cf_comparator(cf_name);
+ opts->merge_operator = get_cf_merge_operator(cf_name);
}
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h
index 32f2308284f..19e5da6a79e 100644
--- a/storage/rocksdb/rdb_cf_options.h
+++ b/storage/rocksdb/rdb_cf_options.h
@@ -64,6 +64,9 @@ public:
static const rocksdb::Comparator *
get_cf_comparator(const std::string &cf_name);
+ std::shared_ptr<rocksdb::MergeOperator>
+ get_cf_merge_operator(const std::string &cf_name);
+
void get_cf_options(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts)
MY_ATTRIBUTE((__nonnull__));
diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h
index 9bc7ece6e7a..20ae3c740c1 100644
--- a/storage/rocksdb/rdb_compact_filter.h
+++ b/storage/rocksdb/rdb_compact_filter.h
@@ -167,7 +167,7 @@ public:
sql_print_error("Decoding ttl from PK value failed in compaction filter, "
"for index (%u,%u), val: %s",
m_prev_index.cf_id, m_prev_index.index_id, buf.c_str());
- abort_with_stack_traces();
+ abort();
}
/*
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index b2f5af705a3..ebda73d0870 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -32,6 +32,7 @@
#include <limits>
#include <map>
#include <set>
+#include <string>
#include <utility>
#include <vector>
@@ -826,6 +827,25 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) {
return changed;
}
+/*
+ @return Number of bytes that were changed
+*/
+int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint &len) {
+ DBUG_ASSERT(packed_tuple != nullptr);
+
+ int changed = 0;
+ uchar *p = packed_tuple + len - 1;
+ for (; p > packed_tuple; p--) {
+ changed++;
+ if (*p != uchar(0x00)) {
+ *p = *p - 1;
+ break;
+ }
+ *p = 0xFF;
+ }
+ return changed;
+}
+
static const std::map<char, size_t> UNPACK_HEADER_SIZES = {
{RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE},
{RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}};
@@ -1429,11 +1449,11 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
MY_BITMAP covered_bitmap;
my_bitmap_map covered_bits;
uint curr_bitmap_pos = 0;
- bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
const bool has_covered_bitmap =
has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG);
if (has_covered_bitmap) {
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
RDB_UNPACK_COVERED_DATA_LEN_SIZE);
@@ -1508,6 +1528,18 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
}
if ((this->*fpi->m_skip_func)(fpi, field, &reader))
return HA_ERR_ROCKSDB_CORRUPT_DATA;
+
+ // If this is a space padded varchar, we need to skip the indicator
+ // bytes for trailing bytes. They're useless since we can't restore the
+ // field anyway.
+ //
+ // There is a special case for prefixed varchars where we do not
+ // generate unpack info, because we know prefixed varchars cannot be
+ // unpacked. In this case, it is not necessary to skip.
+ if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad &&
+ !fpi->m_unpack_info_stores_value) {
+ unp_reader.read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1);
+ }
}
}
@@ -3487,6 +3519,20 @@ void Rdb_tbl_def::set_name(const std::string &name) {
check_if_is_mysql_system_table();
}
+GL_INDEX_ID Rdb_tbl_def::get_autoincr_gl_index_id() {
+ for (uint i = 0; i < m_key_count; i++) {
+ auto &k = m_key_descr_arr[i];
+ if (k->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY ||
+ k->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY) {
+ return k->get_gl_index_id();
+ }
+ }
+
+ // Every table must have a primary key, even if it's hidden.
+ abort();
+ return GL_INDEX_ID();
+}
+
/*
Static function of type my_hash_get_key that gets invoked by
the m_ddl_hash object of type my_core::HASH.
@@ -3714,6 +3760,68 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir,
}
/*
+ Validate that all auto increment values in the data dictionary are on a
+ supported version.
+*/
+bool Rdb_ddl_manager::validate_auto_incr() {
+ std::unique_ptr<rocksdb::Iterator> it(m_dict->new_iterator());
+
+ uchar auto_incr_entry[Rdb_key_def::INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(auto_incr_entry, Rdb_key_def::AUTO_INC);
+ const rocksdb::Slice auto_incr_entry_slice(
+ reinterpret_cast<char *>(auto_incr_entry),
+ Rdb_key_def::INDEX_NUMBER_SIZE);
+ for (it->Seek(auto_incr_entry_slice); it->Valid(); it->Next()) {
+ const rocksdb::Slice key = it->key();
+ const rocksdb::Slice val = it->value();
+ GL_INDEX_ID gl_index_id;
+
+ if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE &&
+ memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE))
+ break;
+
+ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3) {
+ return false;
+ }
+
+ if (val.size() <= Rdb_key_def::VERSION_SIZE) {
+ return false;
+ }
+
+ // Check if we have orphaned entries for whatever reason by cross
+ // referencing ddl entries.
+ auto ptr = reinterpret_cast<const uchar *>(key.data());
+ ptr += Rdb_key_def::INDEX_NUMBER_SIZE;
+ rdb_netbuf_read_gl_index(&ptr, &gl_index_id);
+ if (!m_dict->get_index_info(gl_index_id, nullptr)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "but does not exist as a DDL entry",
+ gl_index_id.cf_id, gl_index_id.index_id);
+ return false;
+ }
+
+ ptr = reinterpret_cast<const uchar *>(val.data());
+ const int version = rdb_netbuf_read_uint16(&ptr);
+ if (version > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ // NO_LINT_DEBUG
+ sql_print_warning("RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "is on unsupported version %d",
+ gl_index_id.cf_id, gl_index_id.index_id, version);
+ return false;
+ }
+ }
+
+ if (!it->status().ok()) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
Validate that all the tables in the RocksDB database dictionary match the .frm
files in the datadir
*/
@@ -3877,10 +3985,18 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
If validate_tables is greater than 0 run the validation. Only fail the
initialzation if the setting is 1. If the setting is 2 we continue.
*/
- if (validate_tables > 0 && !validate_schemas()) {
- if (validate_tables == 1) {
- sql_print_error("RocksDB: Problems validating data dictionary "
- "against .frm files, exiting");
+ if (validate_tables > 0) {
+ std::string msg;
+ if (!validate_schemas()) {
+ msg = "RocksDB: Problems validating data dictionary "
+ "against .frm files, exiting";
+ } else if (!validate_auto_incr()) {
+ msg = "RocksDB: Problems validating auto increment values in "
+ "data dictionary, exiting";
+ }
+ if (validate_tables == 1 && !msg.empty()) {
+ // NO_LINT_DEBUG
+ sql_print_error("%s", msg.c_str());
return true;
}
}
@@ -4154,6 +4270,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
new_rec->m_auto_incr_val =
rec->m_auto_incr_val.load(std::memory_order_relaxed);
new_rec->m_key_descr_arr = rec->m_key_descr_arr;
+
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
@@ -4613,13 +4730,16 @@ void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch,
const GL_INDEX_ID &gl_index_id) const {
delete_with_prefix(batch, Rdb_key_def::INDEX_INFO, gl_index_id);
delete_with_prefix(batch, Rdb_key_def::INDEX_STATISTICS, gl_index_id);
+ delete_with_prefix(batch, Rdb_key_def::AUTO_INC, gl_index_id);
}
bool Rdb_dict_manager::get_index_info(
const GL_INDEX_ID &gl_index_id,
struct Rdb_index_info *const index_info) const {
- index_info->m_gl_index_id = gl_index_id;
+ if (index_info) {
+ index_info->m_gl_index_id = gl_index_id;
+ }
bool found = false;
bool error = false;
@@ -4630,6 +4750,10 @@ bool Rdb_dict_manager::get_index_info(
const rocksdb::Status &status = get_value(key, &value);
if (status.ok()) {
+ if (!index_info) {
+ return true;
+ }
+
const uchar *const val = (const uchar *)value.c_str();
const uchar *ptr = val;
index_info->m_index_dict_version = rdb_netbuf_to_uint16(val);
@@ -4668,6 +4792,11 @@ bool Rdb_dict_manager::get_index_info(
index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
ptr += RDB_SIZEOF_KV_VERSION;
index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ if ((index_info->m_kv_version ==
+ Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) &&
+ index_info->m_ttl_duration > 0) {
+ index_info->m_index_flags = Rdb_key_def::TTL_FLAG;
+ }
found = true;
break;
@@ -4709,7 +4838,7 @@ bool Rdb_dict_manager::get_index_info(
"and it may be a bug.",
index_info->m_index_dict_version, index_info->m_index_type,
index_info->m_kv_version, index_info->m_ttl_duration);
- abort_with_stack_traces();
+ abort();
}
return found;
@@ -4972,7 +5101,7 @@ void Rdb_dict_manager::resume_drop_indexes() const {
"bug.",
max_index_id_in_dict, gl_index_id.cf_id,
gl_index_id.index_id);
- abort_with_stack_traces();
+ abort();
}
}
}
@@ -5021,7 +5150,7 @@ void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id,
"from index id (%u,%u). MyRocks data dictionary may "
"get corrupted.",
gl_index_id.cf_id, gl_index_id.index_id);
- abort_with_stack_traces();
+ abort();
}
}
}
@@ -5079,7 +5208,7 @@ void Rdb_dict_manager::add_stats(
// IndexStats::materialize takes complete care of serialization including
// storing the version
const auto value =
- Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it}, 1.);
+ Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it});
batch->Put(m_system_cfh, rocksdb::Slice((char *)key_buf, sizeof(key_buf)),
value);
@@ -5105,6 +5234,53 @@ Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const {
return Rdb_index_stats();
}
+rocksdb::Status
+Rdb_dict_manager::put_auto_incr_val(rocksdb::WriteBatchBase *batch,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong val, bool overwrite) const {
+ uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
+ dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id);
+ const rocksdb::Slice key =
+ rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
+
+ // Value is constructed by storing the version and the value.
+ uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0};
+ uchar *ptr = value_buf;
+ rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION);
+ ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION;
+ rdb_netbuf_store_uint64(ptr, val);
+ ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE;
+ const rocksdb::Slice value =
+ rocksdb::Slice(reinterpret_cast<char *>(value_buf), ptr - value_buf);
+
+ if (overwrite) {
+ return batch->Put(m_system_cfh, key, value);
+ }
+ return batch->Merge(m_system_cfh, key, value);
+}
+
+bool Rdb_dict_manager::get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const {
+ uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
+ dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id);
+
+ std::string value;
+ const rocksdb::Status status = get_value(
+ rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)),
+ &value);
+
+ if (status.ok()) {
+ const uchar *const val = reinterpret_cast<const uchar *>(value.data());
+
+ if (rdb_netbuf_to_uint16(val) <= Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ *new_val = rdb_netbuf_to_uint64(val + RDB_SIZEOF_AUTO_INCREMENT_VERSION);
+ return true;
+ }
+ }
+ return false;
+}
+
uint Rdb_seq_generator::get_and_update_next_number(
Rdb_dict_manager *const dict) {
DBUG_ASSERT(dict != nullptr);
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 223f61edb43..326570c433d 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -138,6 +138,7 @@ const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16);
const size_t RDB_SIZEOF_INDEX_TYPE = sizeof(uchar);
const size_t RDB_SIZEOF_KV_VERSION = sizeof(uint16);
const size_t RDB_SIZEOF_INDEX_FLAGS = sizeof(uint32);
+const size_t RDB_SIZEOF_AUTO_INCREMENT_VERSION = sizeof(uint16);
// Possible return values for rdb_index_field_unpack_t functions.
enum {
@@ -237,17 +238,28 @@ public:
*size = INDEX_NUMBER_SIZE;
}
+ /* Get the first key that you need to position at to start iterating.
+ Returns a "supremum" or "infimum" for this index based on collation order
+ */
+ inline void get_first_key(uchar *const key, uint *const size) const {
+ return m_is_reverse_cf ? get_supremum_key(key, size)
+ : get_infimum_key(key, size);
+ }
+
/* Make a key that is right after the given key. */
static int successor(uchar *const packed_tuple, const uint &len);
+ /* Make a key that is right before the given key. */
+ static int predecessor(uchar *const packed_tuple, const uint &len);
+
/*
This can be used to compare prefixes.
if X is a prefix of Y, then we consider that X = Y.
*/
// b describes the lookup key, which can be a prefix of a.
+ // b might be outside of the index_number range, if successor() is called.
int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const {
DBUG_ASSERT(covers_key(a));
- DBUG_ASSERT(covers_key(b));
return memcmp(a.data(), b.data(), std::min(a.size(), b.size()));
}
@@ -383,6 +395,7 @@ public:
INDEX_STATISTICS = 6,
MAX_INDEX_ID = 7,
DDL_CREATE_INDEX_ONGOING = 8,
+ AUTO_INC = 9,
END_DICT_INDEX_ID = 255
};
@@ -395,6 +408,7 @@ public:
DDL_DROP_INDEX_ONGOING_VERSION = 1,
MAX_INDEX_ID_VERSION = 1,
DDL_CREATE_INDEX_ONGOING_VERSION = 1,
+ AUTO_INCREMENT_VERSION = 1,
// Version for index stats is stored in IndexStats struct
};
@@ -968,17 +982,17 @@ public:
Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete;
explicit Rdb_tbl_def(const std::string &name)
- : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(name);
}
Rdb_tbl_def(const char *const name, const size_t &len)
- : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(std::string(name, len));
}
explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0)
- : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
+ : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(std::string(slice.data() + pos, slice.size() - pos));
}
@@ -991,7 +1005,7 @@ public:
std::shared_ptr<Rdb_key_def> *m_key_descr_arr;
std::atomic<longlong> m_hidden_pk_val;
- std::atomic<longlong> m_auto_incr_val;
+ std::atomic<ulonglong> m_auto_incr_val;
/* Is this a system table */
bool m_is_mysql_system_table;
@@ -1003,6 +1017,7 @@ public:
const std::string &base_dbname() const { return m_dbname; }
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
+ GL_INDEX_ID get_autoincr_gl_index_id();
};
/*
@@ -1115,6 +1130,8 @@ private:
static void free_hash_elem(void *const data);
bool validate_schemas();
+
+ bool validate_auto_incr();
};
/*
@@ -1179,8 +1196,9 @@ private:
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
- value: version, index_type, kv_format_version, ttl_duration
+ value: version, index_type, kv_format_version, index_flags, ttl_duration
index_type is 1 byte, version and kv_format_version are 2 bytes.
+ index_flags is 4 bytes.
ttl_duration is 8 bytes.
3. CF id => CF flags
@@ -1209,6 +1227,11 @@ private:
key: Rdb_key_def::DDL_CREATE_INDEX_ONGOING(0x8) + cf_id + index_id
value: version
+ 9. auto_increment values
+ key: Rdb_key_def::AUTO_INC(0x9) + cf_id + index_id
+ value: version, {max auto_increment so far}
+ max auto_increment is 8 bytes
+
Data dictionary operations are atomic inside RocksDB. For example,
when creating a table with two indexes, it is necessary to call Put
three times. They have to be atomic. Rdb_dict_manager has a wrapper function
@@ -1350,6 +1373,13 @@ public:
void add_stats(rocksdb::WriteBatch *const batch,
const std::vector<Rdb_index_stats> &stats) const;
Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const;
+
+ rocksdb::Status put_auto_incr_val(rocksdb::WriteBatchBase *batch,
+ const GL_INDEX_ID &gl_index_id,
+ ulonglong val,
+ bool overwrite = false) const;
+ bool get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
+ ulonglong *new_val) const;
};
struct Rdb_index_info {
@@ -1361,6 +1391,109 @@ struct Rdb_index_info {
uint64 m_ttl_duration = 0;
};
+/*
+ @brief
+ Merge Operator for the auto_increment value in the system_cf
+
+ @detail
+ This class implements the rocksdb Merge Operator for auto_increment values
+ that are stored to the data dictionary every transaction.
+
+ The actual Merge function is triggered on compaction, memtable flushes, or
+ when get() is called on the same key.
+
+ */
+class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator {
+ public:
+ /*
+ Updates the new value associated with a key to be the maximum of the
+ passed in value and the existing value.
+
+ @param[IN] key
+ @param[IN] existing_value existing value for a key; nullptr if nonexistent
+ key
+ @param[IN] value
+ @param[OUT] new_value new value after Merge
+ @param[IN] logger
+ */
+ bool Merge(const rocksdb::Slice &key, const rocksdb::Slice *existing_value,
+ const rocksdb::Slice &value, std::string *new_value,
+ rocksdb::Logger *logger) const override {
+ DBUG_ASSERT(new_value != nullptr);
+
+ if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 ||
+ GetKeyType(key) != Rdb_key_def::AUTO_INC ||
+ value.size() !=
+ RDB_SIZEOF_AUTO_INCREMENT_VERSION + ROCKSDB_SIZEOF_AUTOINC_VALUE ||
+ GetVersion(value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ abort();
+ }
+
+ uint64_t merged_value = Deserialize(value);
+
+ if (existing_value != nullptr) {
+ if (existing_value->size() != RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE ||
+ GetVersion(*existing_value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
+ abort();
+ }
+
+ merged_value = std::max(merged_value, Deserialize(*existing_value));
+ }
+ Serialize(merged_value, new_value);
+ return true;
+ }
+
+ virtual const char *Name() const override { return "Rdb_system_merge_op"; }
+
+ private:
+ /*
+ Serializes the integer data to the new_value buffer or the target buffer
+ the merge operator will update to
+ */
+ void Serialize(const uint64_t data, std::string *new_value) const {
+ uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0};
+ uchar *ptr = value_buf;
+ /* fill in the auto increment version */
+ rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION);
+ ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION;
+ /* fill in the auto increment value */
+ rdb_netbuf_store_uint64(ptr, data);
+ ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE;
+ new_value->assign(reinterpret_cast<char *>(value_buf), ptr - value_buf);
+ }
+
+ /*
+ Gets the value of auto_increment type in the data dictionary from the
+ value slice
+
+ @Note Only to be used on data dictionary keys for the auto_increment type
+ */
+ uint64_t Deserialize(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(s.data()) +
+ RDB_SIZEOF_AUTO_INCREMENT_VERSION);
+ }
+
+ /*
+ Gets the type of the key of the key in the data dictionary.
+
+ @Note Only to be used on data dictionary keys for the auto_increment type
+ */
+ uint16_t GetKeyType(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(s.data()));
+ }
+
+ /*
+ Gets the version of the auto_increment value in the data dictionary.
+
+ @Note Only to be used on data dictionary value for the auto_increment type
+ */
+ uint16_t GetVersion(const rocksdb::Slice &s) const {
+ return rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(s.data()));
+ }
+};
+
bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs);
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
index f8ddcb00fb3..3ecd998dec5 100644
--- a/storage/rocksdb/rdb_i_s.cc
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -786,7 +786,7 @@ static int rdb_i_s_global_info_fill_table(
"from CF with id = %u. MyRocks data dictionary may "
"be corrupted.",
cf_handle->GetID());
- abort_with_stack_traces();
+ abort();
}
snprintf(cf_id_buf, INT_BUF_LEN, "%u", cf_handle->GetID());
@@ -917,7 +917,10 @@ enum {
INDEX_NUMBER,
INDEX_TYPE,
KV_FORMAT_VERSION,
- CF
+ TTL_DURATION,
+ INDEX_FLAGS,
+ CF,
+ AUTO_INCREMENT
};
} // namespace RDB_DDL_FIELD
@@ -932,7 +935,11 @@ static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = {
ROCKSDB_FIELD_INFO("INDEX_TYPE", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0),
ROCKSDB_FIELD_INFO("KV_FORMAT_VERSION", sizeof(uint16_t), MYSQL_TYPE_SHORT,
0),
+ ROCKSDB_FIELD_INFO("TTL_DURATION", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("INDEX_FLAGS", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0),
ROCKSDB_FIELD_INFO("CF", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("AUTO_INCREMENT", sizeof(uint64_t), MYSQL_TYPE_LONGLONG,
+ MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED),
ROCKSDB_FIELD_INFO_END};
int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
@@ -943,6 +950,7 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
DBUG_ASSERT(m_table != nullptr);
Field **field = m_table->field;
DBUG_ASSERT(field != nullptr);
+ const Rdb_dict_manager *dict_manager = rdb_get_dict_manager();
const std::string &dbname = tdef->base_dbname();
field[RDB_DDL_FIELD::TABLE_SCHEMA]->store(dbname.c_str(), dbname.size(),
@@ -973,10 +981,20 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
field[RDB_DDL_FIELD::INDEX_TYPE]->store(kd.m_index_type, true);
field[RDB_DDL_FIELD::KV_FORMAT_VERSION]->store(kd.m_kv_format_version,
true);
+ field[RDB_DDL_FIELD::TTL_DURATION]->store(kd.m_ttl_duration, true);
+ field[RDB_DDL_FIELD::INDEX_FLAGS]->store(kd.m_index_flags_bitmap, true);
std::string cf_name = kd.get_cf()->GetName();
field[RDB_DDL_FIELD::CF]->store(cf_name.c_str(), cf_name.size(),
system_charset_info);
+ ulonglong auto_incr;
+ if (dict_manager->get_auto_incr_val(tdef->get_autoincr_gl_index_id(),
+ &auto_incr)) {
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_notnull();
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->store(auto_incr, true);
+ } else {
+ field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_null();
+ }
ret = my_core::schema_table_store_record(m_thd, m_table);
if (ret)
@@ -1462,6 +1480,117 @@ static int rdb_i_s_trx_info_init(void *const p) {
DBUG_RETURN(0);
}
+/*
+ Support for INFORMATION_SCHEMA.ROCKSDB_DEADLOCK dynamic table
+ */
+namespace RDB_DEADLOCK_FIELD {
+enum {
+ DEADLOCK_ID = 0,
+ TRANSACTION_ID,
+ CF_NAME,
+ WAITING_KEY,
+ LOCK_TYPE,
+ INDEX_NAME,
+ TABLE_NAME,
+ ROLLED_BACK
+};
+} // namespace RDB_TRX_FIELD
+
+static ST_FIELD_INFO rdb_i_s_deadlock_info_fields_info[] = {
+ ROCKSDB_FIELD_INFO("DEADLOCK_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("LOCK_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("ROLLED_BACK", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO_END};
+
+/* Fill the information_schema.rocksdb_trx virtual table */
+static int rdb_i_s_deadlock_info_fill_table(
+ my_core::THD *const thd, my_core::TABLE_LIST *const tables,
+ my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ static const std::string str_exclusive("EXCLUSIVE");
+ static const std::string str_shared("SHARED");
+
+ int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
+ const std::vector<Rdb_deadlock_info> &all_dl_info = rdb_get_deadlock_info();
+
+ ulonglong id = 0;
+ for (const auto &info : all_dl_info) {
+ for (const auto &trx_info : info.path) {
+ tables->table->field[RDB_DEADLOCK_FIELD::DEADLOCK_ID]->store(id, true);
+ tables->table->field[RDB_DEADLOCK_FIELD::TRANSACTION_ID]->store(
+ trx_info.trx_id, true);
+ tables->table->field[RDB_DEADLOCK_FIELD::CF_NAME]->store(
+ trx_info.cf_name.c_str(), trx_info.cf_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::WAITING_KEY]->store(
+ trx_info.waiting_key.c_str(), trx_info.waiting_key.length(),
+ system_charset_info);
+ if (trx_info.exclusive_lock) {
+ tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store(
+ str_exclusive.c_str(), str_exclusive.length(), system_charset_info);
+ } else {
+ tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store(
+ str_shared.c_str(), str_shared.length(), system_charset_info);
+ }
+ tables->table->field[RDB_DEADLOCK_FIELD::INDEX_NAME]->store(
+ trx_info.index_name.c_str(), trx_info.index_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::TABLE_NAME]->store(
+ trx_info.table_name.c_str(), trx_info.table_name.length(),
+ system_charset_info);
+ tables->table->field[RDB_DEADLOCK_FIELD::ROLLED_BACK]->store(
+ trx_info.trx_id == info.victim_trx_id, true);
+
+ /* Tell MySQL about this row in the virtual table */
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret != 0) {
+ break;
+ }
+ }
+ id++;
+ }
+
+ DBUG_RETURN(ret);
+}
+
+/* Initialize the information_schema.rocksdb_trx_info virtual table */
+static int rdb_i_s_deadlock_info_init(void *const p) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(p != nullptr);
+
+ my_core::ST_SCHEMA_TABLE *schema;
+
+ schema = (my_core::ST_SCHEMA_TABLE *)p;
+
+ schema->fields_info = rdb_i_s_deadlock_info_fields_info;
+ schema->fill_table = rdb_i_s_deadlock_info_fill_table;
+
+ DBUG_RETURN(0);
+}
+
static int rdb_i_s_deinit(void *p MY_ATTRIBUTE((__unused__))) {
DBUG_ENTER_FUNC();
DBUG_RETURN(0);
@@ -1645,4 +1774,20 @@ struct st_maria_plugin rdb_i_s_trx_info = {
nullptr, /* config options */
MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
+
+struct st_mysql_plugin rdb_i_s_deadlock_info = {
+ MYSQL_INFORMATION_SCHEMA_PLUGIN,
+ &rdb_i_s_info,
+ "ROCKSDB_DEADLOCK",
+ "Facebook",
+ "RocksDB transaction information",
+ PLUGIN_LICENSE_GPL,
+ rdb_i_s_deadlock_info_init,
+ nullptr,
+ 0x0001, /* version number (0.1) */
+ nullptr, /* status variables */
+ nullptr, /* system variables */
+ nullptr, /* config options */
+ 0, /* flags */
+};
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h
index 08d35e17ba9..d6a48bf3fec 100644
--- a/storage/rocksdb/rdb_i_s.h
+++ b/storage/rocksdb/rdb_i_s.h
@@ -32,4 +32,5 @@ extern struct st_maria_plugin rdb_i_s_ddl;
extern struct st_maria_plugin rdb_i_s_index_file_map;
extern struct st_maria_plugin rdb_i_s_lock_info;
extern struct st_maria_plugin rdb_i_s_trx_info;
+extern struct st_maria_plugin rdb_i_s_deadlock_info;
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc
index 039b0d7baf1..f09efefcd2a 100644
--- a/storage/rocksdb/rdb_io_watchdog.cc
+++ b/storage/rocksdb/rdb_io_watchdog.cc
@@ -45,7 +45,7 @@ void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
"Shutting the service down.",
m_write_timeout);
- abort_with_stack_traces();
+ abort();
}
void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc
index d126d156314..0c561c62ab2 100644
--- a/storage/rocksdb/rdb_perf_context.cc
+++ b/storage/rocksdb/rdb_perf_context.cc
@@ -47,8 +47,13 @@ std::string rdb_pc_stat_types[] = {
"BLOCK_READ_TIME",
"BLOCK_CHECKSUM_TIME",
"BLOCK_DECOMPRESS_TIME",
+ "GET_READ_BYTES",
+ "MULTIGET_READ_BYTES",
+ "ITER_READ_BYTES",
"INTERNAL_KEY_SKIPPED_COUNT",
"INTERNAL_DELETE_SKIPPED_COUNT",
+ "INTERNAL_RECENT_SKIPPED_COUNT",
+ "INTERNAL_MERGE_COUNT",
"GET_SNAPSHOT_TIME",
"GET_FROM_MEMTABLE_TIME",
"GET_FROM_MEMTABLE_COUNT",
@@ -56,9 +61,12 @@ std::string rdb_pc_stat_types[] = {
"GET_FROM_OUTPUT_FILES_TIME",
"SEEK_ON_MEMTABLE_TIME",
"SEEK_ON_MEMTABLE_COUNT",
+ "NEXT_ON_MEMTABLE_COUNT",
+ "PREV_ON_MEMTABLE_COUNT",
"SEEK_CHILD_SEEK_TIME",
"SEEK_CHILD_SEEK_COUNT",
- "SEEK_IN_HEAP_TIME",
+ "SEEK_MIN_HEAP_TIME",
+ "SEEK_MAX_HEAP_TIME",
"SEEK_INTERNAL_SEEK_TIME",
"FIND_NEXT_USER_ENTRY_TIME",
"WRITE_WAL_TIME",
@@ -74,6 +82,12 @@ std::string rdb_pc_stat_types[] = {
"NEW_TABLE_ITERATOR_NANOS",
"BLOCK_SEEK_NANOS",
"FIND_TABLE_NANOS",
+ "BLOOM_MEMTABLE_HIT_COUNT",
+ "BLOOM_MEMTABLE_MISS_COUNT",
+ "BLOOM_SST_HIT_COUNT",
+ "BLOOM_SST_MISS_COUNT",
+ "KEY_LOCK_WAIT_TIME",
+ "KEY_LOCK_WAIT_COUNT",
"IO_THREAD_POOL_ID",
"IO_BYTES_WRITTEN",
"IO_BYTES_READ",
@@ -107,8 +121,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
IO_PERF_RECORD(block_read_time);
IO_PERF_RECORD(block_checksum_time);
IO_PERF_RECORD(block_decompress_time);
+ IO_PERF_RECORD(get_read_bytes);
+ IO_PERF_RECORD(multiget_read_bytes);
+ IO_PERF_RECORD(iter_read_bytes);
IO_PERF_RECORD(internal_key_skipped_count);
IO_PERF_RECORD(internal_delete_skipped_count);
+ IO_PERF_RECORD(internal_recent_skipped_count);
+ IO_PERF_RECORD(internal_merge_count);
IO_PERF_RECORD(get_snapshot_time);
IO_PERF_RECORD(get_from_memtable_time);
IO_PERF_RECORD(get_from_memtable_count);
@@ -116,9 +135,12 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
IO_PERF_RECORD(get_from_output_files_time);
IO_PERF_RECORD(seek_on_memtable_time);
IO_PERF_RECORD(seek_on_memtable_count);
+ IO_PERF_RECORD(next_on_memtable_count);
+ IO_PERF_RECORD(prev_on_memtable_count);
IO_PERF_RECORD(seek_child_seek_time);
IO_PERF_RECORD(seek_child_seek_count);
IO_PERF_RECORD(seek_min_heap_time);
+ IO_PERF_RECORD(seek_max_heap_time);
IO_PERF_RECORD(seek_internal_seek_time);
IO_PERF_RECORD(find_next_user_entry_time);
IO_PERF_RECORD(write_wal_time);
@@ -134,6 +156,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
IO_PERF_RECORD(new_table_iterator_nanos);
IO_PERF_RECORD(block_seek_nanos);
IO_PERF_RECORD(find_table_nanos);
+ IO_PERF_RECORD(bloom_memtable_hit_count);
+ IO_PERF_RECORD(bloom_memtable_miss_count);
+ IO_PERF_RECORD(bloom_sst_hit_count);
+ IO_PERF_RECORD(bloom_sst_miss_count);
+ IO_PERF_RECORD(key_lock_wait_time);
+ IO_PERF_RECORD(key_lock_wait_count);
+
IO_STAT_RECORD(thread_pool_id);
IO_STAT_RECORD(bytes_written);
IO_STAT_RECORD(bytes_read);
diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h
index f9b9fd48d3e..2aca3dc3bfd 100644
--- a/storage/rocksdb/rdb_perf_context.h
+++ b/storage/rocksdb/rdb_perf_context.h
@@ -37,8 +37,13 @@ enum {
PC_BLOCK_READ_TIME,
PC_BLOCK_CHECKSUM_TIME,
PC_BLOCK_DECOMPRESS_TIME,
+ PC_GET_READ_BYTES,
+ PC_MULTIGET_READ_BYTES,
+ PC_ITER_READ_BYTES,
PC_KEY_SKIPPED,
PC_DELETE_SKIPPED,
+ PC_RECENT_SKIPPED,
+ PC_MERGE,
PC_GET_SNAPSHOT_TIME,
PC_GET_FROM_MEMTABLE_TIME,
PC_GET_FROM_MEMTABLE_COUNT,
@@ -46,9 +51,12 @@ enum {
PC_GET_FROM_OUTPUT_FILES_TIME,
PC_SEEK_ON_MEMTABLE_TIME,
PC_SEEK_ON_MEMTABLE_COUNT,
+ PC_NEXT_ON_MEMTABLE_COUNT,
+ PC_PREV_ON_MEMTABLE_COUNT,
PC_SEEK_CHILD_SEEK_TIME,
PC_SEEK_CHILD_SEEK_COUNT,
PC_SEEK_MIN_HEAP_TIME,
+ PC_SEEK_MAX_HEAP_TIME,
PC_SEEK_INTERNAL_SEEK_TIME,
PC_FIND_NEXT_USER_ENTRY_TIME,
PC_WRITE_WAL_TIME,
@@ -64,6 +72,12 @@ enum {
PC_NEW_TABLE_ITERATOR_NANOS,
PC_BLOCK_SEEK_NANOS,
PC_FIND_TABLE_NANOS,
+ PC_BLOOM_MEMTABLE_HIT_COUNT,
+ PC_BLOOM_MEMTABLE_MISS_COUNT,
+ PC_BLOOM_SST_HIT_COUNT,
+ PC_BLOOM_SST_MISS_COUNT,
+ PC_KEY_LOCK_WAIT_TIME,
+ PC_KEY_LOCK_WAIT_COUNT,
PC_IO_THREAD_POOL_ID,
PC_IO_BYTES_WRITTEN,
PC_IO_BYTES_READ,
diff --git a/storage/rocksdb/rdb_psi.cc b/storage/rocksdb/rdb_psi.cc
index b6bc89a02f9..b5309df5973 100644
--- a/storage/rocksdb/rdb_psi.cc
+++ b/storage/rocksdb/rdb_psi.cc
@@ -48,7 +48,7 @@ my_core::PSI_thread_info all_rocksdb_threads[] = {
my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, rdb_signal_bg_psi_mutex_key,
rdb_signal_drop_idx_psi_mutex_key, rdb_collation_data_mutex_key,
rdb_mem_cmp_space_mutex_key, key_mutex_tx_list, rdb_sysvars_psi_mutex_key,
- rdb_cfm_mutex_key;
+ rdb_cfm_mutex_key, rdb_sst_commit_key;
my_core::PSI_mutex_info all_rocksdb_mutexes[] = {
{&rdb_psi_open_tbls_mutex_key, "open tables", PSI_FLAG_GLOBAL},
@@ -60,6 +60,7 @@ my_core::PSI_mutex_info all_rocksdb_mutexes[] = {
{&key_mutex_tx_list, "tx_list", PSI_FLAG_GLOBAL},
{&rdb_sysvars_psi_mutex_key, "setting sysvar", PSI_FLAG_GLOBAL},
{&rdb_cfm_mutex_key, "column family manager", PSI_FLAG_GLOBAL},
+ {&rdb_sst_commit_key, "sst commit", PSI_FLAG_GLOBAL},
};
my_core::PSI_rwlock_key key_rwlock_collation_exception_list,
diff --git a/storage/rocksdb/rdb_psi.h b/storage/rocksdb/rdb_psi.h
index 0a62f411ade..d4318ee3dba 100644
--- a/storage/rocksdb/rdb_psi.h
+++ b/storage/rocksdb/rdb_psi.h
@@ -40,7 +40,8 @@ extern my_core::PSI_thread_key rdb_background_psi_thread_key,
extern my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key,
rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key,
rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key,
- key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key;
+ key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key,
+ rdb_sst_commit_key;
extern my_core::PSI_rwlock_key key_rwlock_collation_exception_list,
key_rwlock_read_free_rpl_tables, key_rwlock_skip_unique_check_tables;
diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc
index 72abfab5d6b..e0dfb011f87 100644
--- a/storage/rocksdb/rdb_sst_info.cc
+++ b/storage/rocksdb/rdb_sst_info.cc
@@ -43,6 +43,7 @@
#include "./ha_rocksdb.h"
#include "./ha_rocksdb_proto.h"
#include "./rdb_cf_options.h"
+#include "./rdb_psi.h"
namespace myrocks {
@@ -262,7 +263,6 @@ rocksdb::Status Rdb_sst_file_ordered::put(const rocksdb::Slice &key,
if (!m_first_key.empty()) {
rocksdb::Slice first_key_slice(m_first_key);
int cmp = m_file.compare(first_key_slice, key);
- DBUG_ASSERT(cmp != 0);
m_use_stack = (cmp > 0);
// Apply the first key to the stack or SST
@@ -326,11 +326,11 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
const rocksdb::DBOptions &db_options,
const bool &tracing)
: m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0),
- m_sst_count(0), m_background_error(HA_EXIT_SUCCESS),
+ m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), m_committed(false),
#if defined(RDB_SST_INFO_USE_THREAD)
m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false),
#endif
- m_sst_file(nullptr), m_tracing(tracing) {
+ m_sst_file(nullptr), m_tracing(tracing), m_print_client_error(true) {
m_prefix = db->GetName() + "/";
std::string normalized_table;
@@ -357,6 +357,7 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
// Set the maximum size to 3 times the cf's target size
m_max_size = cf_descr.options.target_file_size_base * 3;
}
+ mysql_mutex_init(rdb_sst_commit_key, &m_commit_mutex, MY_MUTEX_INIT_FAST);
}
Rdb_sst_info::~Rdb_sst_info() {
@@ -364,6 +365,7 @@ Rdb_sst_info::~Rdb_sst_info() {
#if defined(RDB_SST_INFO_USE_THREAD)
DBUG_ASSERT(m_thread == nullptr);
#endif
+ mysql_mutex_destroy(&m_commit_mutex);
}
int Rdb_sst_info::open_new_sst_file() {
@@ -428,6 +430,8 @@ void Rdb_sst_info::close_curr_sst_file() {
int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
int rc;
+ DBUG_ASSERT(!m_committed);
+
if (m_curr_size + key.size() + value.size() >= m_max_size) {
// The current sst file has reached its maximum, close it out
close_curr_sst_file();
@@ -461,7 +465,21 @@ int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
return HA_EXIT_SUCCESS;
}
-int Rdb_sst_info::commit() {
+int Rdb_sst_info::commit(bool print_client_error) {
+ int ret = HA_EXIT_SUCCESS;
+
+ // Both the transaction clean up and the ha_rocksdb handler have
+ // references to this Rdb_sst_info and both can call commit, so
+ // synchronize on the object here.
+ RDB_MUTEX_LOCK_CHECK(m_commit_mutex);
+
+ if (m_committed) {
+ RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
+ return ret;
+ }
+
+ m_print_client_error = print_client_error;
+
if (m_curr_size > 0) {
// Close out any existing files
close_curr_sst_file();
@@ -480,16 +498,24 @@ int Rdb_sst_info::commit() {
}
#endif
+ m_committed = true;
+ RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
+
// Did we get any errors?
if (have_background_error()) {
- return get_and_reset_background_error();
+ ret = get_and_reset_background_error();
}
- return HA_EXIT_SUCCESS;
+ m_print_client_error = true;
+ return ret;
}
void Rdb_sst_info::set_error_msg(const std::string &sst_file_name,
const rocksdb::Status &s) {
+
+ if (!m_print_client_error)
+ return;
+
#if defined(RDB_SST_INFO_USE_THREAD)
// Both the foreground and background threads can set the error message
// so lock the mutex to protect it. We only want the first error that
diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h
index 1dee0fd0518..42f6458e46b 100644
--- a/storage/rocksdb/rdb_sst_info.h
+++ b/storage/rocksdb/rdb_sst_info.h
@@ -128,6 +128,8 @@ class Rdb_sst_info {
std::string m_prefix;
static std::atomic<uint64_t> m_prefix_counter;
static std::string m_suffix;
+ bool m_committed;
+ mysql_mutex_t m_commit_mutex;
#if defined(RDB_SST_INFO_USE_THREAD)
std::queue<Rdb_sst_file_ordered *> m_queue;
std::mutex m_mutex;
@@ -137,6 +139,7 @@ class Rdb_sst_info {
#endif
Rdb_sst_file_ordered *m_sst_file;
const bool m_tracing;
+ bool m_print_client_error;
int open_new_sst_file();
void close_curr_sst_file();
@@ -157,7 +160,8 @@ class Rdb_sst_info {
~Rdb_sst_info();
int put(const rocksdb::Slice &key, const rocksdb::Slice &value);
- int commit();
+ int commit(bool print_client_error = true);
+ bool is_committed() const { return m_committed; }
bool have_background_error() { return m_background_error != 0; }
diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc
index 335676a6ba4..19469d041b3 100644
--- a/storage/rocksdb/rdb_utils.cc
+++ b/storage/rocksdb/rdb_utils.cc
@@ -352,4 +352,30 @@ const char *get_rocksdb_supported_compression_types()
return compression_methods_buf.c_str();
}
+bool rdb_check_rocksdb_corruption() {
+ return !my_access(myrocks::rdb_corruption_marker_file_name().c_str(), F_OK);
+}
+
+void rdb_persist_corruption_marker() {
+ const std::string &fileName(myrocks::rdb_corruption_marker_file_name());
+ int fd = my_open(fileName.c_str(), O_CREAT | O_SYNC, MYF(MY_WME));
+ if (fd < 0) {
+ sql_print_error("RocksDB: Can't create file %s to mark rocksdb as "
+ "corrupted.",
+ fileName.c_str());
+ } else {
+ sql_print_information("RocksDB: Creating the file %s to abort mysqld "
+ "restarts. Remove this file from the data directory "
+ "after fixing the corruption to recover. ",
+ fileName.c_str());
+ }
+
+ int ret = my_close(fd, MYF(MY_WME));
+ if (ret) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Error (%d) closing the file %s", ret,
+ fileName.c_str());
+ }
+}
+
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h
index 3feda5d82ad..3125941ee78 100644
--- a/storage/rocksdb/rdb_utils.h
+++ b/storage/rocksdb/rdb_utils.h
@@ -84,7 +84,7 @@ namespace myrocks {
do { \
if (!(expr)) { \
my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \
- abort_with_stack_traces(); \
+ abort(); \
} \
} while (0)
#endif // SHIP_ASSERT
@@ -250,12 +250,20 @@ inline void rdb_check_mutex_call_result(const char *function_name,
// This will hopefully result in a meaningful stack trace which we can use
// to efficiently debug the root cause.
- abort_with_stack_traces();
+ abort();
}
}
void rdb_log_status_error(const rocksdb::Status &s, const char *msg = nullptr);
+// return true if the marker file exists which indicates that the corruption
+// has been detected
+bool rdb_check_rocksdb_corruption();
+
+// stores a marker file in the data directory so that after restart server
+// is still aware that rocksdb data is corrupted
+void rdb_persist_corruption_marker();
+
/*
Helper functions to parse strings.
*/
diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb
-Subproject 9a970c81af9807071bd690f4c808c5045866291
+Subproject ba295cda29daee3ffe58549542804efdfd96978