diff options
Diffstat (limited to 'storage')
141 files changed, 5177 insertions, 1716 deletions
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index bf95201fb4b..7156150b042 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -95,6 +95,8 @@ SET(ROCKSDB_SE_SOURCES rdb_threads.h rdb_psi.h rdb_psi.cc + rdb_sst_info.cc + rdb_sst_info.h ) # MariaDB: the following is added in build_rocksdb.cmake, when appropriate: @@ -137,8 +139,6 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib event_listener.h rdb_perf_context.cc rdb_perf_context.h - rdb_sst_info.cc - rdb_sst_info.h rdb_buff.h rdb_mariadb_port.h ) @@ -178,9 +178,9 @@ IF(HAVE_SCHED_GETCPU) ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1) ENDIF() -IF (NOT "$ENV{WITH_TBB}" STREQUAL "") +IF (WITH_TBB) SET(rocksdb_static_libs ${rocksdb_static_libs} - $ENV{WITH_TBB}/libtbb${PIC_EXT}.a) + ${WITH_TBB}/lib/libtbb${PIC_EXT}.a) ADD_DEFINITIONS(-DTBB) ENDIF() diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake index 5810412f566..c76f711463e 100644 --- a/storage/rocksdb/build_rocksdb.cmake +++ b/storage/rocksdb/build_rocksdb.cmake @@ -12,11 +12,6 @@ INCLUDE_DIRECTORIES( ${ROCKSDB_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src ) -IF(WIN32) - INCLUDE_DIRECTORIES(BEFORE - ${CMAKE_CURRENT_SOURCE_DIR}/patch) -ENDIF() - list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") if(WIN32) @@ -245,6 +240,7 @@ set(ROCKSDB_SOURCES table/block_based_table_factory.cc table/block_based_table_reader.cc table/block_builder.cc + table/block_fetcher.cc table/block_prefix_index.cc table/bloom_block.cc table/cuckoo_table_builder.cc @@ -340,15 +336,16 @@ set(ROCKSDB_SOURCES utilities/transactions/optimistic_transaction_db_impl.cc utilities/transactions/pessimistic_transaction.cc utilities/transactions/pessimistic_transaction_db.cc + utilities/transactions/snapshot_checker.cc utilities/transactions/transaction_base.cc utilities/transactions/transaction_db_mutex_impl.cc utilities/transactions/transaction_lock_mgr.cc utilities/transactions/transaction_util.cc utilities/transactions/write_prepared_txn.cc + utilities/transactions/write_prepared_txn_db.cc utilities/ttl/db_ttl_impl.cc utilities/write_batch_with_index/write_batch_with_index.cc utilities/write_batch_with_index/write_batch_with_index_internal.cc - ) if(WIN32) diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc index 5854da54210..1a621b5df4b 100644 --- a/storage/rocksdb/event_listener.cc +++ b/storage/rocksdb/event_listener.cc @@ -83,4 +83,14 @@ void Rdb_event_listener::OnExternalFileIngested( DBUG_ASSERT(db != nullptr); update_index_stats(info.table_properties); } + +void Rdb_event_listener::OnBackgroundError( + rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) { + rdb_log_status_error(*status, "Error detected in background"); + sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason); + if (status->IsCorruption()) { + rdb_persist_corruption_marker(); + abort(); + } +} } // namespace myrocks diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h index d535031644b..8772105de36 100644 --- a/storage/rocksdb/event_listener.h +++ b/storage/rocksdb/event_listener.h @@ -37,6 +37,9 @@ public: rocksdb::DB *db, const rocksdb::ExternalFileIngestionInfo &ingestion_info) override; + void OnBackgroundError(rocksdb::BackgroundErrorReason reason, + rocksdb::Status *status) override; + private: Rdb_ddl_manager *m_ddl_manager; diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 03086db3da9..baada708c61 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -144,10 +144,6 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton, my_core::TABLE_SHARE *table_arg, my_core::MEM_ROOT *mem_root); -bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, - const rocksdb::Slice &eq_cond, - const bool use_all_keys); - static rocksdb::CompactRangeOptions getCompactRangeOptions() { rocksdb::CompactRangeOptions compact_range_options; compact_range_options.bottommost_level_compaction = @@ -423,24 +419,37 @@ static void rocksdb_set_collation_exception_list(THD *thd, void *var_ptr, const void *save); -void rocksdb_set_update_cf_options(THD *thd, - struct st_mysql_sys_var *var, - void *var_ptr, - const void *save); +static int rocksdb_validate_update_cf_options(THD *thd, + struct st_mysql_sys_var *var, + void *save, + st_mysql_value *value); -static void -rocksdb_set_bulk_load(THD *thd, - struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), - void *var_ptr, const void *save); +static void rocksdb_set_update_cf_options(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); + +static int rocksdb_check_bulk_load(THD *const thd, + struct st_mysql_sys_var *var + MY_ATTRIBUTE((__unused__)), + void *save, + struct st_mysql_value *value); -static void rocksdb_set_bulk_load_allow_unsorted( - THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), - void *var_ptr, const void *save); +static int rocksdb_check_bulk_load_allow_unsorted( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value); static void rocksdb_set_max_background_jobs(THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr, const void *const save); +static void rocksdb_set_bytes_per_sync(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save); +static void rocksdb_set_wal_bytes_per_sync(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save); ////////////////////////////////////////////////////////////////////////////// // Options definitions ////////////////////////////////////////////////////////////////////////////// @@ -471,6 +480,7 @@ static char *rocksdb_compact_cf_name; static char *rocksdb_checkpoint_name; static my_bool rocksdb_signal_drop_index_thread; static my_bool rocksdb_strict_collation_check = 1; +static my_bool rocksdb_ignore_unknown_options = 1; static my_bool rocksdb_enable_2pc = 0; static char *rocksdb_strict_collation_exceptions; static my_bool rocksdb_collect_sst_properties = 1; @@ -484,7 +494,6 @@ static int rocksdb_debug_ttl_read_filter_ts = 0; static my_bool rocksdb_debug_ttl_ignore_pk = 0; static my_bool rocksdb_reset_stats = 0; static uint32_t rocksdb_io_write_timeout_secs = 0; -static uint64_t rocksdb_number_stat_computes = 0; static uint32_t rocksdb_seconds_between_stat_computes = 3600; static long long rocksdb_compaction_sequential_deletes = 0l; static long long rocksdb_compaction_sequential_deletes_window = 0l; @@ -495,11 +504,14 @@ static uint32_t rocksdb_table_stats_sampling_pct; static my_bool rocksdb_enable_bulk_load_api = 1; static my_bool rocksdb_print_snapshot_conflict_queries = 0; static my_bool rocksdb_large_prefix = 0; +static my_bool rocksdb_allow_to_start_after_corruption = 0; static char* rocksdb_git_hash; char *compression_types_val= const_cast<char*>(get_rocksdb_supported_compression_types()); +std::atomic<uint64_t> rocksdb_row_lock_deadlocks(0); +std::atomic<uint64_t> rocksdb_row_lock_wait_timeouts(0); std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0); std::atomic<uint64_t> rocksdb_wal_group_syncs(0); @@ -510,8 +522,9 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) { o->listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager)); o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL; o->max_subcompactions = DEFAULT_SUBCOMPACTIONS; + o->max_open_files = -2; // auto-tune to 50% open_files_limit - o->concurrent_prepare = true; + o->two_write_queues = true; o->manual_wal_flush = true; return o; } @@ -593,6 +606,33 @@ static void rocksdb_set_io_write_timeout( RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } +enum rocksdb_flush_log_at_trx_commit_type : unsigned int { + FLUSH_LOG_NEVER = 0, + FLUSH_LOG_SYNC, + FLUSH_LOG_BACKGROUND, + FLUSH_LOG_MAX /* must be last */ +}; + +static int rocksdb_validate_flush_log_at_trx_commit( + THD *const thd, + struct st_mysql_sys_var *const var, /* in: pointer to system variable */ + void *var_ptr, /* out: immediate result for update function */ + struct st_mysql_value *const value /* in: incoming value */) { + long long new_value; + + /* value is NULL */ + if (value->val_int(value, &new_value)) { + return HA_EXIT_FAILURE; + } + + if (rocksdb_db_options->allow_mmap_writes && new_value != FLUSH_LOG_NEVER) { + return HA_EXIT_FAILURE; + } + + *static_cast<uint32_t *>(var_ptr) = static_cast<uint32_t>(new_value); + return HA_EXIT_SUCCESS; +} + static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS}; static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1, @@ -600,7 +640,7 @@ static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1, nullptr}; const ulong RDB_MAX_LOCK_WAIT_SECONDS = 1024 * 1024 * 1024; -const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024 * 1024; +const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024; const ulong RDB_DEFAULT_BULK_LOAD_SIZE = 1000; const ulong RDB_MAX_BULK_LOAD_SIZE = 1024 * 1024 * 1024; const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024; @@ -640,12 +680,13 @@ static MYSQL_THDVAR_BOOL( bulk_load, PLUGIN_VAR_RQCMDARG, "Use bulk-load mode for inserts. This disables " "unique_checks and enables rocksdb_commit_in_the_middle.", - nullptr, rocksdb_set_bulk_load, FALSE); + rocksdb_check_bulk_load, nullptr, FALSE); static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG, "Allow unsorted input during bulk-load. " "Can be changed only when bulk load is disabled.", - nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE); + rocksdb_check_bulk_load_allow_unsorted, nullptr, + FALSE); static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -751,11 +792,11 @@ static MYSQL_SYSVAR_BOOL( rocksdb_db_options->create_if_missing); static MYSQL_SYSVAR_BOOL( - concurrent_prepare, - *reinterpret_cast<my_bool *>(&rocksdb_db_options->concurrent_prepare), + two_write_queues, + *reinterpret_cast<my_bool *>(&rocksdb_db_options->two_write_queues), PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr, - rocksdb_db_options->concurrent_prepare); + "DBOptions::two_write_queues for RocksDB", nullptr, nullptr, + rocksdb_db_options->two_write_queues); static MYSQL_SYSVAR_BOOL( manual_wal_flush, @@ -882,7 +923,7 @@ static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options->max_open_files, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "DBOptions::max_open_files for RocksDB", nullptr, nullptr, rocksdb_db_options->max_open_files, - /* min */ -1, /* max */ INT_MAX, 0); + /* min */ -2, /* max */ INT_MAX, 0); static MYSQL_SYSVAR_UINT64_T(max_total_wal_size, rocksdb_db_options->max_total_wal_size, @@ -1063,16 +1104,18 @@ static MYSQL_SYSVAR_BOOL( rocksdb_db_options->use_adaptive_mutex); static MYSQL_SYSVAR_UINT64_T(bytes_per_sync, rocksdb_db_options->bytes_per_sync, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + PLUGIN_VAR_RQCMDARG, "DBOptions::bytes_per_sync for RocksDB", nullptr, - nullptr, rocksdb_db_options->bytes_per_sync, + rocksdb_set_bytes_per_sync, + rocksdb_db_options->bytes_per_sync, /* min */ 0L, /* max */ ULONGLONG_MAX, 0); static MYSQL_SYSVAR_UINT64_T(wal_bytes_per_sync, rocksdb_db_options->wal_bytes_per_sync, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + PLUGIN_VAR_RQCMDARG, "DBOptions::wal_bytes_per_sync for RocksDB", nullptr, - nullptr, rocksdb_db_options->wal_bytes_per_sync, + rocksdb_set_wal_bytes_per_sync, + rocksdb_db_options->wal_bytes_per_sync, /* min */ 0L, /* max */ ULONGLONG_MAX, 0); static MYSQL_SYSVAR_BOOL( @@ -1190,22 +1233,17 @@ static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options, static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC /* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/, - "Option updates per column family for RocksDB", nullptr, + "Option updates per column family for RocksDB", + rocksdb_validate_update_cf_options, rocksdb_set_update_cf_options, nullptr); -enum rocksdb_flush_log_at_trx_commit_type : unsigned int { - FLUSH_LOG_NEVER = 0, - FLUSH_LOG_SYNC, - FLUSH_LOG_BACKGROUND, - FLUSH_LOG_MAX /* must be last */ -}; - static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit, rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG, "Sync on transaction commit. Similar to " "innodb_flush_log_at_trx_commit. 1: sync on commit, " "0,2: not sync on commit", - nullptr, nullptr, /* default */ FLUSH_LOG_SYNC, + rocksdb_validate_flush_log_at_trx_commit, nullptr, + /* default */ FLUSH_LOG_SYNC, /* min */ FLUSH_LOG_NEVER, /* max */ FLUSH_LOG_BACKGROUND, 0); @@ -1346,6 +1384,11 @@ static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG, "Enable two phase commit for MyRocks", nullptr, nullptr, TRUE); +static MYSQL_SYSVAR_BOOL(ignore_unknown_options, rocksdb_ignore_unknown_options, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enable ignoring unknown options passed to RocksDB", + nullptr, nullptr, TRUE); + static MYSQL_SYSVAR_BOOL(strict_collation_check, rocksdb_strict_collation_check, PLUGIN_VAR_RQCMDARG, "Enforce case sensitive collation for MyRocks indexes", @@ -1377,11 +1420,6 @@ static MYSQL_SYSVAR_BOOL( rocksdb_force_flush_memtable_and_lzero_now, rocksdb_force_flush_memtable_and_lzero_now_stub, FALSE); -static MYSQL_THDVAR_BOOL( - flush_memtable_on_analyze, PLUGIN_VAR_RQCMDARG, - "Forces memtable flush on ANALZYE table to get accurate cardinality", - nullptr, nullptr, true); - static MYSQL_SYSVAR_UINT( seconds_between_stat_computes, rocksdb_seconds_between_stat_computes, PLUGIN_VAR_RQCMDARG, @@ -1498,6 +1536,13 @@ static MYSQL_SYSVAR_BOOL( "index prefix length is 767.", nullptr, nullptr, FALSE); +static MYSQL_SYSVAR_BOOL( + allow_to_start_after_corruption, rocksdb_allow_to_start_after_corruption, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Allow server still to start successfully even if RocksDB corruption is " + "detected.", + nullptr, nullptr, FALSE); + static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; static struct st_mysql_sys_var *rocksdb_system_variables[] = { @@ -1523,7 +1568,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(skip_bloom_filter_on_read), MYSQL_SYSVAR(create_if_missing), - MYSQL_SYSVAR(concurrent_prepare), + MYSQL_SYSVAR(two_write_queues), MYSQL_SYSVAR(manual_wal_flush), MYSQL_SYSVAR(create_missing_column_families), MYSQL_SYSVAR(error_if_exists), @@ -1605,6 +1650,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(signal_drop_index_thread), MYSQL_SYSVAR(pause_background_work), MYSQL_SYSVAR(enable_2pc), + MYSQL_SYSVAR(ignore_unknown_options), MYSQL_SYSVAR(strict_collation_check), MYSQL_SYSVAR(strict_collation_exceptions), MYSQL_SYSVAR(collect_sst_properties), @@ -1618,7 +1664,6 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(debug_ttl_ignore_pk), MYSQL_SYSVAR(reset_stats), MYSQL_SYSVAR(io_write_timeout), - MYSQL_SYSVAR(flush_memtable_on_analyze), MYSQL_SYSVAR(seconds_between_stat_computes), MYSQL_SYSVAR(compaction_sequential_deletes), @@ -1640,6 +1685,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(table_stats_sampling_pct), MYSQL_SYSVAR(large_prefix), + MYSQL_SYSVAR(allow_to_start_after_corruption), MYSQL_SYSVAR(git_hash), nullptr}; @@ -1768,6 +1814,7 @@ protected: ulonglong m_update_count = 0; ulonglong m_delete_count = 0; ulonglong m_lock_count = 0; + std::unordered_map<GL_INDEX_ID, ulonglong> m_auto_incr_map; bool m_is_delayed_snapshot = false; bool m_is_two_phase = false; @@ -1819,7 +1866,30 @@ protected: virtual void do_set_savepoint() = 0; virtual void do_rollback_to_savepoint() = 0; -public: + /* + @detail + This function takes in the WriteBatch of the transaction to add + all the AUTO_INCREMENT merges. It does so by iterating through + m_auto_incr_map and then constructing key/value pairs to call merge upon. + + @param wb + */ + rocksdb::Status merge_auto_incr_map(rocksdb::WriteBatchBase *const wb) { + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", return rocksdb::Status::OK();); + + // Iterate through the merge map merging all keys into data dictionary. + rocksdb::Status s; + for (auto &it : m_auto_incr_map) { + s = dict_manager.put_auto_incr_val(wb, it.first, it.second); + if (!s.ok()) { + return s; + } + } + m_auto_incr_map.clear(); + return s; + } + + public: const char *m_mysql_log_file_name; my_off_t m_mysql_log_offset; #ifdef MARIAROCKS_NOT_YET @@ -1888,6 +1958,7 @@ public: m_detailed_error.copy(timeout_message( "index", tbl_def->full_tablename().c_str(), kd.get_name().c_str())); table_handler->m_lock_wait_timeout_counter.inc(); + rocksdb_row_lock_wait_timeouts++; return HA_ERR_LOCK_WAIT_TIMEOUT; } @@ -1897,6 +1968,7 @@ public: false /* just statement */); m_detailed_error = String(); table_handler->m_deadlock_counter.inc(); + rocksdb_row_lock_deadlocks++; return HA_ERR_LOCK_DEADLOCK; } else if (s.IsBusy()) { rocksdb_snapshot_conflict_errors++; @@ -2026,10 +2098,16 @@ public: rollback(); return true; } else { +#ifdef MARIAROCKS_NOT_YET + /* + Storing binlog position inside MyRocks is needed only for restoring + MyRocks from backups. This feature is not supported yet. + */ mysql_bin_log_commit_pos(m_thd, &m_mysql_log_offset, &m_mysql_log_file_name); binlog_manager.update(m_mysql_log_file_name, m_mysql_log_offset, get_write_batch()); +#endif return commit_no_binlog(); } } @@ -2050,28 +2128,110 @@ public: bool has_snapshot() const { return m_read_opts.snapshot != nullptr; } private: - // The tables we are currently loading. In a partitioned table this can - // have more than one entry - std::vector<ha_rocksdb *> m_curr_bulk_load; + // The Rdb_sst_info structures we are currently loading. In a partitioned + // table this can have more than one entry + std::vector<std::shared_ptr<Rdb_sst_info>> m_curr_bulk_load; + std::string m_curr_bulk_load_tablename; + + /* External merge sorts for bulk load: key ID -> merge sort instance */ + std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge; public: - int finish_bulk_load() { - int rc = 0; + int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf, + Rdb_index_merge **key_merge) { + int res; + auto it = m_key_merge.find(kd_gl_id); + if (it == m_key_merge.end()) { + m_key_merge.emplace( + std::piecewise_construct, std::make_tuple(kd_gl_id), + std::make_tuple( + get_rocksdb_tmpdir(), THDVAR(get_thd(), merge_buf_size), + THDVAR(get_thd(), merge_combine_read_size), + THDVAR(get_thd(), merge_tmp_file_removal_delay_ms), cf)); + it = m_key_merge.find(kd_gl_id); + if ((res = it->second.init()) != 0) { + return res; + } + } + *key_merge = &it->second; + return HA_EXIT_SUCCESS; + } - std::vector<ha_rocksdb *>::iterator it; - while ((it = m_curr_bulk_load.begin()) != m_curr_bulk_load.end()) { - int rc2 = (*it)->finalize_bulk_load(); + int finish_bulk_load(int print_client_error = true) { + int rc = 0, rc2; + + std::vector<std::shared_ptr<Rdb_sst_info>>::iterator it; + for (it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); it++) { + rc2 = (*it)->commit(print_client_error); if (rc2 != 0 && rc == 0) { rc = rc2; } } - + m_curr_bulk_load.clear(); + m_curr_bulk_load_tablename.clear(); DBUG_ASSERT(m_curr_bulk_load.size() == 0); + // Flush the index_merge sort buffers + if (!m_key_merge.empty()) { + rocksdb::Slice merge_key; + rocksdb::Slice merge_val; + for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { + GL_INDEX_ID index_id = it->first; + std::shared_ptr<const Rdb_key_def> keydef = + ddl_manager.safe_find(index_id); + std::string table_name = ddl_manager.safe_get_table_name(index_id); + + // Unable to find key definition or table name since the + // table could have been dropped. + // TODO(herman): there is a race here between dropping the table + // and detecting a drop here. If the table is dropped while bulk + // loading is finishing, these keys being added here may + // be missed by the compaction filter and not be marked for + // removal. It is unclear how to lock the sql table from the storage + // engine to prevent modifications to it while bulk load is occurring. + if (keydef == nullptr || table_name.empty()) { + rc2 = HA_ERR_ROCKSDB_BULK_LOAD; + break; + } + const std::string &index_name = keydef->get_name(); + Rdb_index_merge &rdb_merge = it->second; + + // Rdb_sst_info expects a denormalized table name in the form of + // "./database/table" + std::replace(table_name.begin(), table_name.end(), '.', '/'); + table_name = "./" + table_name; + Rdb_sst_info sst_info(rdb, table_name, index_name, rdb_merge.get_cf(), + *rocksdb_db_options, + THDVAR(get_thd(), trace_sst_api)); + + while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) { + if ((rc2 = sst_info.put(merge_key, merge_val)) != 0) { + break; + } + } + + // rc2 == -1 => finished ok; rc2 > 0 => error + if (rc2 > 0 || (rc2 = sst_info.commit(print_client_error)) != 0) { + if (rc == 0) { + rc = rc2; + } + break; + } + } + m_key_merge.clear(); + + /* + Explicitly tell jemalloc to clean up any unused dirty pages at this + point. + See https://reviews.facebook.net/D63723 for more details. + */ + purge_all_jemalloc_arenas(); + } return rc; } - void start_bulk_load(ha_rocksdb *const bulk_load) { + int start_bulk_load(ha_rocksdb *const bulk_load, + std::shared_ptr<Rdb_sst_info> sst_info) { /* If we already have an open bulk load of a table and the name doesn't match the current one, close out the currently running one. This allows @@ -2081,29 +2241,46 @@ public: DBUG_ASSERT(bulk_load != nullptr); if (!m_curr_bulk_load.empty() && - !bulk_load->same_table(*m_curr_bulk_load[0])) { + bulk_load->get_table_basename() != m_curr_bulk_load_tablename) { const auto res = finish_bulk_load(); - SHIP_ASSERT(res == 0); - } - - m_curr_bulk_load.push_back(bulk_load); - } - - void end_bulk_load(ha_rocksdb *const bulk_load) { - for (auto it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); - it++) { - if (*it == bulk_load) { - m_curr_bulk_load.erase(it); - return; + if (res != HA_EXIT_SUCCESS) { + m_curr_bulk_load.clear(); + m_curr_bulk_load_tablename.clear(); + return res; } } - // Should not reach here - SHIP_ASSERT(0); + /* + This used to track ha_rocksdb handler objects, but those can be + freed by the table cache while this was referencing them. Instead + of tracking ha_rocksdb handler objects, this now tracks the + Rdb_sst_info allocated, and both the ha_rocksdb handler and the + Rdb_transaction both have shared pointers to them. + + On transaction complete, it will commit each Rdb_sst_info structure found. + If the ha_rocksdb object is freed, etc., it will also commit + the Rdb_sst_info. The Rdb_sst_info commit path needs to be idempotent. + */ + m_curr_bulk_load.push_back(sst_info); + m_curr_bulk_load_tablename = bulk_load->get_table_basename(); + return HA_EXIT_SUCCESS; } int num_ongoing_bulk_load() const { return m_curr_bulk_load.size(); } + const char *get_rocksdb_tmpdir() const { + const char *tmp_dir = THDVAR(get_thd(), tmpdir); + + /* + We want to treat an empty string as nullptr, in these cases DDL operations + will use the default --tmpdir passed to mysql instead. + */ + if (tmp_dir != nullptr && *tmp_dir == '\0') { + tmp_dir = nullptr; + } + return (tmp_dir); + } + /* Flush the data accumulated so far. This assumes we're doing a bulk insert. @@ -2130,6 +2307,20 @@ public: return false; } + void set_auto_incr(const GL_INDEX_ID &gl_index_id, ulonglong curr_id) { + m_auto_incr_map[gl_index_id] = + std::max(m_auto_incr_map[gl_index_id], curr_id); + } + +#ifndef NDEBUG + ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) { + if (m_auto_incr_map.count(gl_index_id) > 0) { + return m_auto_incr_map[gl_index_id]; + } + return 0; + } +#endif + virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, const rocksdb::Slice &value) = 0; @@ -2153,15 +2344,17 @@ public: virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - std::string *value) const = 0; + rocksdb::PinnableSlice *const value) const = 0; virtual rocksdb::Status get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, bool exclusive) = 0; rocksdb::Iterator * get_iterator(rocksdb::ColumnFamilyHandle *const column_family, bool skip_bloom_filter, bool fill_cache, + const rocksdb::Slice &eq_cond_lower_bound, + const rocksdb::Slice &eq_cond_upper_bound, bool read_current = false, bool create_snapshot = true) { // Make sure we are not doing both read_current (which implies we don't // want a snapshot) and create_snapshot which makes sure we create @@ -2176,6 +2369,8 @@ public: if (skip_bloom_filter) { options.total_order_seek = true; + options.iterate_lower_bound = &eq_cond_lower_bound; + options.iterate_upper_bound = &eq_cond_upper_bound; } else { // With this option, Iterator::Valid() returns false if key // is outside of the prefix bloom filter range set at Seek(). @@ -2333,6 +2528,12 @@ private: return false; } + s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + return false; + } + s = m_rocksdb_tx->Prepare(); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); @@ -2343,13 +2544,24 @@ private: bool commit_no_binlog() override { bool res = false; + rocksdb::Status s; + + s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } + release_snapshot(); - const rocksdb::Status s = m_rocksdb_tx->Commit(); + s = m_rocksdb_tx->Commit(); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); res = true; + goto error; } +error: /* Save the transaction object to be reused */ release_tx(); @@ -2370,6 +2582,7 @@ public: m_update_count = 0; m_delete_count = 0; m_lock_count = 0; + m_auto_incr_map.clear(); m_ddl_transaction = false; if (m_rocksdb_tx) { release_snapshot(); @@ -2471,18 +2684,25 @@ public: rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - std::string *value) const override { + rocksdb::PinnableSlice *const value) const override { + // clean PinnableSlice right begfore Get() for multiple gets per statement + // the resources after the last Get in a statement are cleared in + // handler::reset call + value->Reset(); global_stats.queries[QUERIES_POINT].inc(); return m_rocksdb_tx->Get(m_read_opts, column_family, key, value); } rocksdb::Status get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, bool exclusive) override { if (++m_lock_count > m_max_row_locks) return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + if (value != nullptr) { + value->Reset(); + } return m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value, exclusive); } @@ -2620,13 +2840,24 @@ private: bool commit_no_binlog() override { bool res = false; + rocksdb::Status s; + + s = merge_auto_incr_map(m_batch->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } + release_snapshot(); - const rocksdb::Status s = - rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch()); + + s = rdb->GetBaseDB()->Write(write_opts, m_batch->GetWriteBatch()); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); res = true; + goto error; } +error: reset(); m_write_count = 0; @@ -2725,14 +2956,15 @@ public: rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, const rocksdb::Slice &key, - std::string *const value) const override { + rocksdb::PinnableSlice *const value) const override { + value->Reset(); return m_batch->GetFromBatchAndDB(rdb, m_read_opts, column_family, key, value); } rocksdb::Status get_for_update(rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, bool exclusive) override { return get(column_family, key, value); } @@ -2863,13 +3095,12 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) { static int rocksdb_close_connection(handlerton *const hton, THD *const thd) { Rdb_transaction *&tx = get_tx_from_thd(thd); if (tx != nullptr) { - int rc = tx->finish_bulk_load(); + int rc = tx->finish_bulk_load(false); if (rc != 0) { // NO_LINT_DEBUG sql_print_error("RocksDB: Error %d finalizing last SST file while " "disconnecting", rc); - abort_with_stack_traces(); } delete tx; @@ -2918,7 +3149,8 @@ static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__))) /* target_lsn is set to 0 when MySQL wants to sync the wal files */ - if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { + if ((target_lsn == 0 && !rocksdb_db_options->allow_mmap_writes) || + rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { rocksdb_wal_group_syncs++; s = rdb->FlushWAL(target_lsn == 0 || rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); @@ -2949,7 +3181,11 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx) /* We were instructed to prepare the whole transaction, or this is an SQL statement end and autocommit is on */ -#ifdef MARIAROCKS_NOT_YET // Crash-safe slave does not work yet +#ifdef MARIAROCKS_NOT_YET + /* + Storing binlog position inside MyRocks is needed only for restoring + MyRocks from backups. This feature is not supported yet. + */ std::vector<st_slave_gtid_info> slave_gtid_info; my_core::thd_slave_gtid_info(thd, &slave_gtid_info); for (const auto &it : slave_gtid_info) { @@ -3403,79 +3639,54 @@ private: "=========================================\n"; } - static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn, - const GL_INDEX_ID &gl_index_id, - bool is_last_path = false) { - std::string txn_data; + static Rdb_deadlock_info::Rdb_dl_trx_info + get_dl_txn_info(const rocksdb::DeadlockInfo &txn, + const GL_INDEX_ID &gl_index_id) { + Rdb_deadlock_info::Rdb_dl_trx_info txn_data; - /* extract table name and index names using the index id */ - std::string table_name = ddl_manager.safe_get_table_name(gl_index_id); - if (table_name.empty()) { - table_name = + txn_data.trx_id = txn.m_txn_id; + + txn_data.table_name = ddl_manager.safe_get_table_name(gl_index_id); + if (txn_data.table_name.empty()) { + txn_data.table_name = "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); } + auto kd = ddl_manager.safe_find(gl_index_id); - std::string idx_name = + txn_data.index_name = (kd) ? kd->get_name() : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); - /* get the name of the column family */ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id); - std::string cf_name = cfh->GetName(); - - txn_data += format_string( - "TRANSACTIONID: %u\n" - "COLUMN FAMILY NAME: %s\n" - "WAITING KEY: %s\n" - "LOCK TYPE: %s\n" - "INDEX NAME: %s\n" - "TABLE NAME: %s\n", - txn.m_txn_id, cf_name.c_str(), - rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length()) - .c_str(), - txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(), - table_name.c_str()); - if (!is_last_path) { - txn_data += "---------------WAITING FOR---------------\n"; - } + txn_data.cf_name = cfh->GetName(); + + txn_data.waiting_key = + rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length()); + + txn_data.exclusive_lock = txn.m_exclusive; + return txn_data; } - static std::string - get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) { - std::string path_data; - if (path_entry.limit_exceeded) { - path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; - } else { - path_data += "\n*** DEADLOCK PATH\n" - "=========================================\n"; - for (auto it = path_entry.path.begin(); it != path_entry.path.end(); - it++) { - auto txn = *it; - const GL_INDEX_ID gl_index_id = { - txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>( - txn.m_waiting_key.c_str()))}; - path_data += get_dlock_txn_info(txn, gl_index_id); - } + static Rdb_deadlock_info + get_dl_path_trx_info(const rocksdb::DeadlockPath &path_entry) { + Rdb_deadlock_info deadlock_info; - DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); - /* print the first txn in the path to display the full deadlock cycle */ - if (!path_entry.path.empty() && !path_entry.limit_exceeded) { - auto txn = path_entry.path[0]; - const GL_INDEX_ID gl_index_id = { - txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>( - txn.m_waiting_key.c_str()))}; - path_data += get_dlock_txn_info(txn, gl_index_id, true); - - /* prints the txn id of the transaction that caused the deadlock */ - auto deadlocking_txn = *(path_entry.path.end() - 1); - path_data += - format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n", - deadlocking_txn.m_txn_id); - } + for (auto it = path_entry.path.begin(); it != path_entry.path.end(); + it++) { + auto txn = *it; + const GL_INDEX_ID gl_index_id = { + txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>( + txn.m_waiting_key.c_str()))}; + deadlock_info.path.push_back(get_dl_txn_info(txn, gl_index_id)); } - - return path_data; + DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); + /* print the first txn in the path to display the full deadlock cycle */ + if (!path_entry.path.empty() && !path_entry.limit_exceeded) { + auto deadlocking_txn = *(path_entry.path.end() - 1); + deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id; + } + return deadlock_info; } public: @@ -3514,9 +3725,48 @@ private: m_data += "----------LATEST DETECTED DEADLOCKS----------\n"; for (auto path_entry : dlock_buffer) { - m_data += get_dlock_path_info(path_entry); + std::string path_data; + if (path_entry.limit_exceeded) { + path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; + } else { + path_data += "\n*** DEADLOCK PATH\n" + "=========================================\n"; + const auto dl_info = get_dl_path_trx_info(path_entry); + for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) { + const auto trx_info = *it; + path_data += format_string( + "TRANSACTION ID: %u\n" + "COLUMN FAMILY NAME: %s\n" + "WAITING KEY: %s\n" + "LOCK TYPE: %s\n" + "INDEX NAME: %s\n" + "TABLE NAME: %s\n", + trx_info.trx_id, trx_info.cf_name.c_str(), + trx_info.waiting_key.c_str(), + trx_info.exclusive_lock ? "EXCLUSIVE" : "SHARED", + trx_info.index_name.c_str(), trx_info.table_name.c_str()); + if (it != dl_info.path.end() - 1) { + path_data += "---------------WAITING FOR---------------\n"; + } + } + path_data += + format_string("\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n", + dl_info.victim_trx_id); + } + m_data += path_data; } } + + std::vector<Rdb_deadlock_info> get_deadlock_info() { + std::vector<Rdb_deadlock_info> deadlock_info; + auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); + for (auto path_entry : dlock_buffer) { + if (!path_entry.limit_exceeded) { + deadlock_info.push_back(get_dl_path_trx_info(path_entry)); + } + } + return deadlock_info; + } }; /** @@ -3605,6 +3855,17 @@ std::vector<Rdb_trx_info> rdb_get_all_trx_info() { return trx_info; } + +/* + returns a vector of info of recent deadlocks + for use by information_schema.rocksdb_deadlock +*/ +std::vector<Rdb_deadlock_info> rdb_get_deadlock_info() { + Rdb_snapshot_status showStatus; + Rdb_transaction::walk_tx_list(&showStatus); + return showStatus.get_deadlock_info(); +} + #ifdef MARIAROCKS_NOT_YET /* Generate the snapshot status table */ static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd, @@ -3911,6 +4172,7 @@ static void rocksdb_update_table_stats( comp_stats_t comp_stats; uint lock_wait_timeout_stats; uint deadlock_stats; + uint lock_wait_stats; std::vector<std::string> tablenames; /* @@ -3957,6 +4219,9 @@ static void rocksdb_update_table_stats( io_perf_write.requests = table_handler->m_io_perf_write.requests.load(); lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load(); deadlock_stats = table_handler->m_deadlock_counter.load(); + lock_wait_stats = + table_handler->m_table_perf_context.m_value[PC_KEY_LOCK_WAIT_COUNT] + .load(); /* Convert from rocksdb timer to mysql timer. RocksDB values are @@ -3984,7 +4249,7 @@ static void rocksdb_update_table_stats( sizeof(tablename_sys)); (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats, - &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats, + &comp_stats, lock_wait_stats, lock_wait_timeout_stats, deadlock_stats, rocksdb_hton_name); } } @@ -3996,8 +4261,9 @@ static rocksdb::Status check_rocksdb_options_compatibility( rocksdb::DBOptions loaded_db_opt; std::vector<rocksdb::ColumnFamilyDescriptor> loaded_cf_descs; - rocksdb::Status status = LoadLatestOptions(dbpath, rocksdb::Env::Default(), - &loaded_db_opt, &loaded_cf_descs); + rocksdb::Status status = + LoadLatestOptions(dbpath, rocksdb::Env::Default(), &loaded_db_opt, + &loaded_cf_descs, rocksdb_ignore_unknown_options); // If we're starting from scratch and there are no options saved yet then this // is a valid case. Therefore we can't compare the current set of options to @@ -4036,7 +4302,8 @@ static rocksdb::Status check_rocksdb_options_compatibility( // This is the essence of the function - determine if it's safe to open the // database or not. status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), main_opts, - loaded_cf_descs); + loaded_cf_descs, + rocksdb_ignore_unknown_options); return status; } @@ -4060,6 +4327,22 @@ static int rocksdb_init_func(void *const p) { DBUG_RETURN(1); } + if (rdb_check_rocksdb_corruption()) { + sql_print_error("RocksDB: There was a corruption detected in RockDB files. " + "Check error log emitted earlier for more details."); + if (rocksdb_allow_to_start_after_corruption) { + sql_print_information( + "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent " + "server operating if RocksDB corruption is detected."); + } else { + sql_print_error("RocksDB: The server will exit normally and stop restart " + "attempts. Remove %s file from data directory and " + "start mysqld manually.", + rdb_corruption_marker_file_name().c_str()); + exit(0); + } + } + // Validate the assumption about the size of ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN. static_assert(sizeof(longlong) == 8, "Assuming that longlong is 8 bytes."); @@ -4136,6 +4419,16 @@ static int rocksdb_init_func(void *const p) { rocksdb_hton->tablefile_extensions= ha_rocksdb_exts; DBUG_ASSERT(!mysqld_embedded); + if (rocksdb_db_options->max_open_files > (long)open_files_limit) { + sql_print_information("RocksDB: rocksdb_max_open_files should not be " + "greater than the open_files_limit, effective value " + "of rocksdb_max_open_files is being set to " + "open_files_limit / 2."); + rocksdb_db_options->max_open_files = open_files_limit / 2; + } else if (rocksdb_db_options->max_open_files == -2) { + rocksdb_db_options->max_open_files = open_files_limit / 2; + } + rocksdb_stats = rocksdb::CreateDBStatistics(); rocksdb_db_options->statistics = rocksdb_stats; @@ -4184,14 +4477,20 @@ static int rocksdb_init_func(void *const p) { DBUG_RETURN(HA_EXIT_FAILURE); } + if (rocksdb_db_options->allow_mmap_writes && + rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 " + "to use allow_mmap_writes"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + // sst_file_manager will move deleted rocksdb sst files to trash_dir // to be deleted in a background thread. std::string trash_dir = std::string(rocksdb_datadir) + "/trash"; - rocksdb_db_options->sst_file_manager.reset( - NewSstFileManager(rocksdb_db_options->env, myrocks_logger, trash_dir)); - - rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond( - rocksdb_sst_mgr_rate_bytes_per_sec); + rocksdb_db_options->sst_file_manager.reset(NewSstFileManager( + rocksdb_db_options->env, myrocks_logger, trash_dir, + rocksdb_sst_mgr_rate_bytes_per_sec, true /* delete_existing_trash */)); std::vector<std::string> cf_names; rocksdb::Status status; @@ -4264,9 +4563,15 @@ static int rocksdb_init_func(void *const p) { if (rocksdb_persistent_cache_size_mb > 0) { std::shared_ptr<rocksdb::PersistentCache> pcache; uint64_t cache_size_bytes= rocksdb_persistent_cache_size_mb * 1024 * 1024; - rocksdb::NewPersistentCache( + status = rocksdb::NewPersistentCache( rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path), cache_size_bytes, myrocks_logger, true, &pcache); + if (!status.ok()) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Persistent cache returned error: (%s)", + status.getState()); + DBUG_RETURN(HA_EXIT_FAILURE); + } rocksdb_tbl_options->persistent_cache = pcache; } else if (strlen(rocksdb_persistent_cache_path)) { sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb"); @@ -4643,6 +4948,33 @@ static inline void rocksdb_smart_next(bool seek_backward, } } +#ifndef NDEBUG +// simulate that RocksDB has reported corrupted data +static void dbug_change_status_to_corrupted(rocksdb::Status *status) { + *status = rocksdb::Status::Corruption(); +} +#endif + +// If the iterator is not valid it might be because of EOF but might be due +// to IOError or corruption. The good practice is always check it. +// https://github.com/facebook/rocksdb/wiki/Iterator#error-handling +static inline bool is_valid(rocksdb::Iterator *scan_it) { + if (scan_it->Valid()) { + return true; + } else { + rocksdb::Status s = scan_it->status(); + DBUG_EXECUTE_IF("rocksdb_return_status_corrupted", + dbug_change_status_to_corrupted(&s);); + if (s.IsIOError() || s.IsCorruption()) { + if (s.IsCorruption()) { + rdb_persist_corruption_marker(); + } + rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); + } + return false; + } +} + /** @brief Example of simple lock controls. The "table_handler" it creates is a @@ -4719,28 +5051,124 @@ std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const { return names; } +/* + Inspired by innobase_get_int_col_max_value from InnoDB. This returns the + maximum value a type can take on. +*/ +static ulonglong rdb_get_int_col_max_value(const Field *field) { + ulonglong max_value = 0; + switch (field->key_type()) { + case HA_KEYTYPE_BINARY: + max_value = 0xFFULL; + break; + case HA_KEYTYPE_INT8: + max_value = 0x7FULL; + break; + case HA_KEYTYPE_USHORT_INT: + max_value = 0xFFFFULL; + break; + case HA_KEYTYPE_SHORT_INT: + max_value = 0x7FFFULL; + break; + case HA_KEYTYPE_UINT24: + max_value = 0xFFFFFFULL; + break; + case HA_KEYTYPE_INT24: + max_value = 0x7FFFFFULL; + break; + case HA_KEYTYPE_ULONG_INT: + max_value = 0xFFFFFFFFULL; + break; + case HA_KEYTYPE_LONG_INT: + max_value = 0x7FFFFFFFULL; + break; + case HA_KEYTYPE_ULONGLONG: + max_value = 0xFFFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_LONGLONG: + max_value = 0x7FFFFFFFFFFFFFFFULL; + break; + default: + abort(); + } + + return max_value; +} + void ha_rocksdb::load_auto_incr_value() { + ulonglong auto_incr = 0; + bool validate_last = false, use_datadic = true; +#ifndef NDEBUG + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;); + validate_last = true; +#endif + + if (use_datadic && dict_manager.get_auto_incr_val( + m_tbl_def->get_autoincr_gl_index_id(), &auto_incr)) { + update_auto_incr_val(auto_incr); + } + + // If we find nothing in the data dictionary, or if we are in debug mode, + // then call index_last to get the last value. + // + // This is needed when upgrading from a server that did not support + // persistent auto_increment, of if the table is empty. + // + // For debug mode, we are just verifying that the data dictionary value is + // greater than or equal to the maximum value in the table. + if (auto_incr == 0 || validate_last) { + auto_incr = load_auto_incr_value_from_index(); + update_auto_incr_val(auto_incr); + } + + // If we failed to find anything from the data dictionary and index, then + // initialize auto_increment to 1. + if (m_tbl_def->m_auto_incr_val == 0) { + update_auto_incr_val(1); + } +} + +ulonglong ha_rocksdb::load_auto_incr_value_from_index() { const int save_active_index = active_index; active_index = table->s->next_number_index; const uint8 save_table_status = table->status; + ulonglong last_val = 0; - /* - load_auto_incr_value() may be called by statements that - do not execute implicit commits (i.e. SHOW CREATE TABLE). - index_last() creates a snapshot. When a snapshot is created - here, it has to be released as well. (GitHub issue#189) - */ Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); + if (is_new_snapshot) { + tx->acquire_snapshot(true); + } // Do a lookup. We only need index column, so it should be index-only. - // (another reason to make it index-only is that table->read_set is - // not set appropriately and non-index-only lookup will not read the value) + // (another reason to make it index-only is that table->read_set is not set + // appropriately and non-index-only lookup will not read the value) const bool save_keyread_only = m_keyread_only; m_keyread_only = true; + m_key_requested = true; - if (!index_last(table->record[0])) - update_auto_incr_val(); + if (!index_last(table->record[0])) { + Field *field = + table->key_info[table->s->next_number_index].key_part[0].field; + ulonglong max_val = rdb_get_int_col_max_value(field); + my_bitmap_map *const old_map = + dbug_tmp_use_all_columns(table, table->read_set); + last_val = field->val_int(); + if (last_val != max_val) { + last_val++; + } +#ifndef NDEBUG + ulonglong dd_val; + if (last_val <= max_val) { + const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id(); + if (dict_manager.get_auto_incr_val(gl_index_id, &dd_val) && + tx->get_auto_incr(gl_index_id) == 0) { + DBUG_ASSERT(dd_val >= last_val); + } + } +#endif + dbug_tmp_restore_column_map(table->read_set, old_map); + } m_keyread_only = save_keyread_only; if (is_new_snapshot) { @@ -4753,38 +5181,46 @@ void ha_rocksdb::load_auto_incr_value() { /* Do what ha_rocksdb::index_end() does. (Why don't we use index_init/index_end? class handler defines index_init - as private, for some reason). - */ + as private, for some reason). + */ release_scan_iterator(); + + return last_val; } -/* Get PK value from table->record[0]. */ -/* - TODO(alexyang): No existing support for auto_increment on non-pk columns, see - end of ha_rocksdb::create. Also see opened issue here: - https://github.com/facebook/mysql-5.6/issues/153 -*/ -void ha_rocksdb::update_auto_incr_val() { +void ha_rocksdb::update_auto_incr_val(ulonglong val) { + ulonglong auto_incr_val = m_tbl_def->m_auto_incr_val; + while ( + auto_incr_val < val && + !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, val)) { + // Do nothing - just loop until auto_incr_val is >= val or we successfully + // set it + } +} + +void ha_rocksdb::update_auto_incr_val_from_field() { Field *field; - longlong new_val; + ulonglong new_val, max_val; field = table->key_info[table->s->next_number_index].key_part[0].field; + max_val = rdb_get_int_col_max_value(field); my_bitmap_map *const old_map = dbug_tmp_use_all_columns(table, table->read_set); new_val = field->val_int(); // don't increment if we would wrap around - if (new_val != std::numeric_limits<longlong>::max()) { + if (new_val != max_val) { new_val++; } dbug_tmp_restore_column_map(table->read_set, old_map); - longlong auto_incr_val = m_tbl_def->m_auto_incr_val; - while (auto_incr_val < new_val && - !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, - new_val)) { - // Do nothing - just loop until auto_incr_val is >= new_val or - // we successfully set it + // Only update if positive value was set for auto_incr column. + if (new_val <= max_val) { + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + tx->set_auto_incr(m_tbl_def->get_autoincr_gl_index_id(), new_val); + + // Update the in memory auto_incr value in m_tbl_def. + update_auto_incr_val(new_val); } } @@ -4796,12 +5232,12 @@ int ha_rocksdb::load_hidden_pk_value() { Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); + longlong hidden_pk_id = 1; // Do a lookup. if (!index_last(table->record[0])) { /* Decode PK field from the key */ - longlong hidden_pk_id = 0; auto err = read_hidden_pk_id_from_rowkey(&hidden_pk_id); if (err) { if (is_new_snapshot) { @@ -4811,11 +5247,11 @@ int ha_rocksdb::load_hidden_pk_value() { } hidden_pk_id++; - longlong old = m_tbl_def->m_hidden_pk_val; - while ( - old < hidden_pk_id && - !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) { - } + } + + longlong old = m_tbl_def->m_hidden_pk_val; + while (old < hidden_pk_id && + !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) { } if (is_new_snapshot) { @@ -4901,18 +5337,15 @@ ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton, m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr), m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr), m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr), - m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr), - m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), - m_bulk_load_tx(nullptr), m_encoder_arr(nullptr), + m_dup_sk_packed_tuple_old(nullptr), m_eq_cond_lower_bound(nullptr), + m_eq_cond_upper_bound(nullptr), m_pack_buffer(nullptr), + m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), m_encoder_arr(nullptr), m_row_checksums_checked(0), m_in_rpl_delete_rows(false), - m_in_rpl_update_rows(false), m_force_skip_unique_check(false) { - // TODO(alexyang): create a valid PSI_mutex_key for this mutex - mysql_mutex_init(0, &m_bulk_load_mutex, MY_MUTEX_INIT_FAST); -} + m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {} -bool ha_rocksdb::same_table(const ha_rocksdb &other) const { - return m_tbl_def->base_tablename() == other.m_tbl_def->base_tablename(); +const std::string &ha_rocksdb::get_table_basename() const { + return m_tbl_def->base_tablename(); } /** @@ -4998,8 +5431,12 @@ bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd, #ifndef NDEBUG read_filter_ts += rdb_dbug_set_ttl_read_filter_ts(); #endif - return ts + kd.m_ttl_duration + read_filter_ts <= - static_cast<uint64>(curr_ts); + bool is_hide_ttl = + ts + kd.m_ttl_duration + read_filter_ts <= static_cast<uint64>(curr_ts); + if (is_hide_ttl) { + update_row_stats(ROWS_FILTERED); + } + return is_hide_ttl; } void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, @@ -5213,12 +5650,12 @@ int ha_rocksdb::convert_record_to_storage_format( Setup which fields will be unpacked when reading rows @detail - Two special cases when we still unpack all fields: + Three special cases when we still unpack all fields: - When this table is being updated (m_lock_rows==RDB_LOCK_WRITE). - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to - read all - fields to find whether there is a row checksum at the end. We could skip - the fields instead of decoding them, but currently we do decoding.) + read all fields to find whether there is a row checksum at the end. We could + skip the fields instead of decoding them, but currently we do decoding.) + - On index merge as bitmap is cleared during that operation @seealso ha_rocksdb::setup_field_converters() @@ -5226,20 +5663,29 @@ int ha_rocksdb::convert_record_to_storage_format( */ void ha_rocksdb::setup_read_decoders() { m_decoders_vect.clear(); + m_key_requested = false; int last_useful = 0; int skip_size = 0; for (uint i = 0; i < table->s->fields; i++) { + // bitmap is cleared on index merge, but it still needs to decode columns + const bool field_requested = + m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || + bitmap_is_clear_all(table->read_set) || + bitmap_is_set(table->read_set, table->field[i]->field_index); + // We only need the decoder if the whole record is stored. if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { + // the field potentially needs unpacking + if (field_requested) { + // the field is in the read set + m_key_requested = true; + } continue; } - // bitmap is cleared on index merge, but it still needs to decode columns - if (m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || - bitmap_is_clear_all(table->read_set) || - bitmap_is_set(table->read_set, table->field[i]->field_index)) { + if (field_requested) { // We will need to decode this field m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size}); last_useful = m_decoders_vect.size(); @@ -5265,13 +5711,18 @@ void ha_rocksdb::setup_read_decoders() { } #ifndef NDEBUG -void dbug_append_garbage_at_end(std::string &on_disk_rec) { - on_disk_rec.append("abc"); +void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) { + std::string str(on_disk_rec->data(), on_disk_rec->size()); + on_disk_rec->Reset(); + str.append("abc"); + on_disk_rec->PinSelf(rocksdb::Slice(str)); } -void dbug_truncate_record(std::string &on_disk_rec) { on_disk_rec.resize(0); } +void dbug_truncate_record(rocksdb::PinnableSlice *on_disk_rec) { + on_disk_rec->remove_suffix(on_disk_rec->size()); +} -void dbug_modify_rec_varchar12(std::string &on_disk_rec) { +void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) { std::string res; // The record is NULL-byte followed by VARCHAR(10). // Put the NULL-byte @@ -5280,7 +5731,8 @@ void dbug_modify_rec_varchar12(std::string &on_disk_rec) { res.append("\xC", 1); res.append("123456789ab", 12); - on_disk_rec.assign(res); + on_disk_rec->Reset(); + on_disk_rec->PinSelf(rocksdb::Slice(res)); } void dbug_modify_key_varchar8(String &on_disk_rec) { @@ -5302,16 +5754,15 @@ void dbug_create_err_inplace_alter() { int ha_rocksdb::convert_record_from_storage_format( const rocksdb::Slice *const key, uchar *const buf) { + DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1", - dbug_append_garbage_at_end(m_retrieved_record);); + dbug_append_garbage_at_end(&m_retrieved_record);); DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2", - dbug_truncate_record(m_retrieved_record);); + dbug_truncate_record(&m_retrieved_record);); DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read3", - dbug_modify_rec_varchar12(m_retrieved_record);); + dbug_modify_rec_varchar12(&m_retrieved_record);); - const rocksdb::Slice retrieved_rec_slice(&m_retrieved_record.front(), - m_retrieved_record.size()); - return convert_record_from_storage_format(key, &retrieved_rec_slice, buf); + return convert_record_from_storage_format(key, &m_retrieved_record, buf); } int ha_rocksdb::convert_blob_from_storage_format( @@ -5474,9 +5925,13 @@ int ha_rocksdb::convert_record_from_storage_format( Rdb_key_def::get_unpack_header_size(unpack_info[0])); } - int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, - unpack_info ? &unpack_slice : nullptr, - false /* verify_checksum */); + int err = HA_EXIT_SUCCESS; + if (m_key_requested) { + err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, + unpack_info ? &unpack_slice : nullptr, + false /* verify_checksum */); + } + if (err != HA_EXIT_SUCCESS) { return err; } @@ -5720,6 +6175,11 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, m_pack_buffer = reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0))); + m_eq_cond_upper_bound = + reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0))); + m_eq_cond_lower_bound = + reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0))); + /* If inplace alter is happening, allocate special buffers for unique secondary index duplicate checking. @@ -5734,6 +6194,7 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, if (m_pk_tuple == nullptr || m_pk_packed_tuple == nullptr || m_sk_packed_tuple == nullptr || m_sk_packed_tuple_old == nullptr || m_end_key_packed_tuple == nullptr || m_pack_buffer == nullptr || + m_eq_cond_upper_bound == nullptr || m_eq_cond_lower_bound == nullptr || (alloc_alter_buffers && (m_dup_sk_packed_tuple == nullptr || m_dup_sk_packed_tuple_old == nullptr))) { // One or more of the above allocations failed. Clean up and exit @@ -5772,6 +6233,12 @@ void ha_rocksdb::free_key_buffers() { my_free(m_dup_sk_packed_tuple_old); m_dup_sk_packed_tuple_old = nullptr; + + my_free(m_eq_cond_upper_bound); + m_eq_cond_upper_bound = nullptr; + + my_free(m_eq_cond_lower_bound); + m_eq_cond_lower_bound = nullptr; } #ifdef MARIAROCKS_NOT_YET @@ -5927,11 +6394,13 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { */ m_verify_row_debug_checksums = false; - /* TODO: move the following to where TABLE_SHARE is opened: */ - if (table->found_next_number_field) + /* Load auto_increment value only once on first use. */ + if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) { load_auto_incr_value(); + } - if (has_hidden_pk(table) && + /* Load hidden pk only once on first use. */ + if (has_hidden_pk(table) && m_tbl_def->m_hidden_pk_val == 0 && (err = load_hidden_pk_value()) != HA_EXIT_SUCCESS) { free_key_buffers(); DBUG_RETURN(err); @@ -6904,6 +7373,20 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)]; + if (create_info->auto_increment_value) { + bool autoinc_upgrade_test = false; + m_tbl_def->m_auto_incr_val = create_info->auto_increment_value; + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;); + if (!autoinc_upgrade_test) { + auto s = dict_manager.put_auto_incr_val( + batch, m_tbl_def->get_autoincr_gl_index_id(), + m_tbl_def->m_auto_incr_val); + if (!s.ok()) { + goto error; + } + } + } + dict_manager.lock(); err = ddl_manager.put_and_write(m_tbl_def, batch); if (err != HA_EXIT_SUCCESS) { @@ -6919,23 +7402,6 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, dict_manager.unlock(); - if (create_info->auto_increment_value) - m_tbl_def->m_auto_incr_val = create_info->auto_increment_value; - - /* - We only support auto_increment at start of the PRIMARY KEY. - */ - // Field *field; - // if ((field= table_arg->next_number_field)) - /* TODO mdcallag: disable this for now to let UNIQUE indexes kind of work - if ((field= table_arg->found_next_number_field)) - { - int pk= table_arg->s->primary_key; - Field *pk_field= table_arg->key_info[pk].key_part[0].field; - if (field->field_index != pk_field->field_index) - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - */ DBUG_RETURN(HA_EXIT_SUCCESS); error: @@ -7033,7 +7499,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd, */ rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice); - while (m_scan_it->Valid()) { + while (is_valid(m_scan_it)) { /* We are using full key and we've hit an exact match, or... @@ -7073,12 +7539,12 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, from the POV of the current transaction. If it has, try going to the next key. */ - while (m_scan_it->Valid() && kd.has_ttl() && + while (is_valid(m_scan_it) && kd.has_ttl() && should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) { rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it); } - return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; + return is_valid(m_scan_it) ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; } int ha_rocksdb::position_to_correct_key( @@ -7238,7 +7704,7 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, bool covered_lookup = m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap); - if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) { + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); if (pk_size == RDB_INVALID_KEY_LEN) { @@ -7339,7 +7805,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { /* Use STATUS_NOT_FOUND when record not found or some error occurred */ table->status = STATUS_NOT_FOUND; - if (m_scan_it->Valid()) { + if (is_valid(m_scan_it)) { rocksdb::Slice key = m_scan_it->key(); /* Check if we've ran out of records of this index */ @@ -7360,8 +7826,7 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { bool covered_lookup = m_keyread_only || m_key_descr_arr[keyno]->covers_lookup( table, &value, &m_lookup_bitmap); - if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && - !has_hidden_pk(table)) { + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { rc = m_key_descr_arr[keyno]->unpack_record( table, buf, &key, &value, m_verify_row_debug_checksums); global_stats.covered_secondary_key_lookups.inc(); @@ -7689,7 +8154,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, while (1) { rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); - if (!m_scan_it->Valid()) { + if (!is_valid(m_scan_it)) { table->status = STATUS_NOT_FOUND; return HA_ERR_END_OF_FILE; } @@ -7953,7 +8418,7 @@ void dbug_dump_database(rocksdb::DB *const db) { rocksdb::Status ha_rocksdb::get_for_update( Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value) const { + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const { DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE); const bool exclusive = m_lock_rows != RDB_LOCK_READ; @@ -8039,6 +8504,9 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, &m_retrieved_record); } + DBUG_EXECUTE_IF("rocksdb_return_status_corrupted", + dbug_change_status_to_corrupted(&s);); + if (!s.IsNotFound() && !s.ok()) { DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def, m_table_handler)); @@ -8049,9 +8517,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, if (found) { /* If we found the record, but it's expired, pretend we didn't find it. */ if (!skip_ttl_check && m_pk_descr->has_ttl() && - should_hide_ttl_rec(*m_pk_descr, - rocksdb::Slice(&m_retrieved_record.front(), - m_retrieved_record.size()), + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, tx->m_snapshot_timestamp)) { DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); } @@ -8224,11 +8690,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - if (kd.m_is_reverse_cf) { - kd.get_supremum_key(key, &key_size); - } else { - kd.get_infimum_key(key, &key_size); - } + int key_start_matching_bytes = kd.get_first_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -8239,7 +8701,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { - setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); + setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes); m_scan_it->Seek(index_key); m_skip_scan_it_next_call = true; @@ -8816,23 +9278,21 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id, The bloom filter may need to be disabled for this lookup. */ - const bool total_order_seek = !can_use_bloom_filter( + const bool total_order_seek = !check_bloom_and_set_bounds( ha_thd(), kd, new_slice, all_parts_used); const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache); - /* - psergey-todo: we just need to take lock, lookups not needed: - */ - std::string dummy_value; const rocksdb::Status s = - get_for_update(row_info.tx, kd.get_cf(), new_slice, &dummy_value); + get_for_update(row_info.tx, kd.get_cf(), new_slice, nullptr); if (!s.ok() && !s.IsNotFound()) { return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler); } rocksdb::Iterator *const iter = row_info.tx->get_iterator( - kd.get_cf(), total_order_seek, fill_cache, true /* read current data */, + kd.get_cf(), total_order_seek, fill_cache, + m_eq_cond_lower_bound_slice, m_eq_cond_upper_bound_slice, + true /* read current data */, false /* acquire snapshot */); /* Need to scan the transaction to see if there is a duplicate key. @@ -8875,9 +9335,7 @@ int ha_rocksdb::check_uniqueness_and_lock( m_retrieved_record by check_and_lock_unique_pk(). */ if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() && - should_hide_ttl_rec(*m_pk_descr, - rocksdb::Slice(&m_retrieved_record.front(), - m_retrieved_record.size()), + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, (row_info.tx->m_snapshot_timestamp ? row_info.tx->m_snapshot_timestamp : static_cast<int64_t>(std::time(nullptr))))) { @@ -8926,109 +9384,48 @@ int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, const rocksdb::Slice &key, const rocksdb::Slice &value, bool sort) { DBUG_ENTER_FUNC(); - + int res; rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); - DBUG_ASSERT(cf != nullptr); - int res = HA_EXIT_SUCCESS; + // In the case of unsorted inserts, m_sst_info allocated here is not + // used to store the keys. It is still used to indicate when tables + // are switched. + if (m_sst_info == nullptr || m_sst_info->is_committed()) { + m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, + kd.get_name(), cf, *rocksdb_db_options, + THDVAR(ha_thd(), trace_sst_api))); + res = tx->start_bulk_load(this, m_sst_info); + if (res != HA_EXIT_SUCCESS) { + DBUG_RETURN(res); + } + } + DBUG_ASSERT(m_sst_info); if (sort) { - GL_INDEX_ID kd_gl_id = kd.get_gl_index_id(); - auto it = m_key_merge.find(kd_gl_id); - if (it == m_key_merge.end()) { - m_key_merge.emplace( - std::piecewise_construct, std::make_tuple(kd_gl_id), - std::make_tuple( - thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size), - THDVAR(ha_thd(), merge_combine_read_size), - THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf)); - it = m_key_merge.find(kd_gl_id); - if ((res = it->second.init()) != 0) { - DBUG_RETURN(res); - } + Rdb_index_merge *key_merge; + DBUG_ASSERT(cf != nullptr); - if (m_bulk_load_tx == nullptr) { - tx->start_bulk_load(this); - m_bulk_load_tx = tx; - } + res = tx->get_key_merge(kd.get_gl_index_id(), cf, &key_merge); + if (res == HA_EXIT_SUCCESS) { + res = key_merge->add(key, value); } - res = it->second.add(key, value); } else { - if (!m_sst_info) { - m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, - kd.get_name(), cf, *rocksdb_db_options, - THDVAR(ha_thd(), trace_sst_api))); - tx->start_bulk_load(this); - m_bulk_load_tx = tx; - } - - DBUG_ASSERT(m_sst_info); - res = m_sst_info->put(key, value); } DBUG_RETURN(res); } -int ha_rocksdb::finalize_bulk_load() { +int ha_rocksdb::finalize_bulk_load(bool print_client_error) { DBUG_ENTER_FUNC(); - DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info, - m_bulk_load_tx != nullptr); - - /* Skip if there are no possible ongoing bulk loads */ - if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) { - DBUG_RETURN(HA_EXIT_SUCCESS); - } - int res = HA_EXIT_SUCCESS; - RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex); - + /* Skip if there are no possible ongoing bulk loads */ if (m_sst_info) { - res = m_sst_info->commit(); + res = m_sst_info->commit(print_client_error); m_sst_info.reset(); } - - if (!m_key_merge.empty()) { - rocksdb::Slice merge_key; - rocksdb::Slice merge_val; - for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { - const std::string &index_name = - ddl_manager.safe_find(it->first)->get_name(); - Rdb_index_merge &rdb_merge = it->second; - Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name, - rdb_merge.get_cf(), *rocksdb_db_options, - THDVAR(ha_thd(), trace_sst_api)); - - while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) { - if ((res = sst_info.put(merge_key, merge_val)) != 0) { - break; - } - } - // res == -1 => finished ok; res > 0 => error - if (res <= 0) { - if ((res = sst_info.commit()) != 0) { - break; - } - } - } - m_key_merge.clear(); - - /* - Explicitly tell jemalloc to clean up any unused dirty pages at this point. - See https://reviews.facebook.net/D63723 for more details. - */ - purge_all_jemalloc_arenas(); - } - - if (m_bulk_load_tx != nullptr) { - m_bulk_load_tx->end_bulk_load(this); - m_bulk_load_tx = nullptr; - } - - RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex); - DBUG_RETURN(res); } @@ -9058,7 +9455,7 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, } if (table->next_number_field) { - update_auto_incr_val(); + update_auto_incr_val_from_field(); } int rc = HA_EXIT_SUCCESS; @@ -9290,6 +9687,41 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, } /* + Setting iterator upper/lower bounds for Seek/SeekForPrev. + This makes RocksDB to avoid scanning tombstones outside of + the given key ranges, when prefix_same_as_start=true was not passed + (when prefix bloom filter can not be used). + Inversing upper/lower bound is necessary on reverse order CF. + This covers HA_READ_PREFIX_LAST* case as well. For example, + if given query eq condition was 12 bytes and condition was + 0x0000b3eb003f65c5e78858b8, and if doing HA_READ_PREFIX_LAST, + eq_cond_len was 11 (see calc_eq_cond_len() for details). + If the index was reverse order, upper bound would be + 0x0000b3eb003f65c5e78857, and lower bound would be + 0x0000b3eb003f65c5e78859. These cover given eq condition range. +*/ +void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond) { + uint eq_cond_len = eq_cond.size(); + memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len); + kd.successor(m_eq_cond_upper_bound, eq_cond_len); + memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len); + kd.predecessor(m_eq_cond_lower_bound, eq_cond_len); + + if (kd.m_is_reverse_cf) { + m_eq_cond_upper_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len); + m_eq_cond_lower_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len); + } else { + m_eq_cond_upper_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_upper_bound, eq_cond_len); + m_eq_cond_lower_bound_slice = + rocksdb::Slice((const char *)m_eq_cond_lower_bound, eq_cond_len); + } +} + +/* Open a cursor */ @@ -9305,7 +9737,7 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, bool skip_bloom = true; const rocksdb::Slice eq_cond(slice->data(), eq_cond_len); - if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys)) { + if (check_bloom_and_set_bounds(ha_thd(), kd, eq_cond, use_all_keys)) { skip_bloom = false; } @@ -9345,7 +9777,9 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, read_opts.snapshot = m_scan_it_snapshot; m_scan_it = rdb->NewIterator(read_opts, kd.get_cf()); } else { - m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache); + m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache, + m_eq_cond_lower_bound_slice, + m_eq_cond_upper_bound_slice); } m_scan_it_skips_bloom = skip_bloom; } @@ -9364,14 +9798,12 @@ void ha_rocksdb::release_scan_iterator() { void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; - if (m_pk_descr->m_is_reverse_cf) - m_pk_descr->get_supremum_key(m_pk_packed_tuple, &key_size); - else - m_pk_descr->get_infimum_key(m_pk_packed_tuple, &key_size); + int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); - setup_scan_iterator(*m_pk_descr, &table_key); + setup_scan_iterator(*m_pk_descr, &table_key, false, + key_start_matching_bytes); m_scan_it->Seek(table_key); m_skip_scan_it_next_call = true; } @@ -9443,7 +9875,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { #ifdef MARIAROCKS_NOT_YET stats.rows_requested++; #endif - if (!m_scan_it || !m_scan_it->Valid()) { + if (!m_scan_it || !is_valid(m_scan_it)) { /* We can get here when SQL layer has called @@ -9465,7 +9897,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { m_scan_it->Prev(); /* this call cannot fail */ } - if (!m_scan_it->Valid()) { + if (!is_valid(m_scan_it)) { rc = HA_ERR_END_OF_FILE; break; } @@ -9774,6 +10206,10 @@ int ha_rocksdb::info(uint flag) { uint64_t memtableCount; uint64_t memtableSize; + // the stats below are calculated from skiplist wich is a probablistic + // data structure, so the results vary between test runs + // it also can return 0 for quite a large tables which means that + // cardinality for memtable only indxes will be reported as 0 rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r, &memtableCount, &memtableSize); @@ -9807,7 +10243,6 @@ int ha_rocksdb::info(uint flag) { if (flag & HA_STATUS_CONST) { ref_length = m_pk_descr->max_storage_fmt_length(); - // TODO: Needs to reimplement after having real index statistics for (uint i = 0; i < m_tbl_def->m_key_count; i++) { if (is_hidden_pk(i, table, m_tbl_def)) { continue; @@ -10091,20 +10526,6 @@ void ha_rocksdb::read_thd_vars(THD *const thd) { m_checksums_pct = THDVAR(thd, checksums_pct); } -const char *ha_rocksdb::thd_rocksdb_tmpdir() { - const char *tmp_dir = THDVAR(ha_thd(), tmpdir); - - /* - We want to treat an empty string as nullptr, in these cases DDL operations - will use the default --tmpdir passed to mysql instead. - */ - if (tmp_dir != nullptr && *tmp_dir == '\0') { - tmp_dir = nullptr; - } - - return (tmp_dir); -} - /** @return @@ -10298,6 +10719,13 @@ ha_rocksdb::get_range(const int &i, return myrocks::get_range(*m_key_descr_arr[i], buf); } +/* + This function is called with total_order_seek=true, but + upper/lower bound setting is not necessary. + Boundary set is useful when there is no matching key, + but in drop_index_thread's case, it means index is marked as removed, + so no further seek will happen for the index id. +*/ static bool is_myrocks_index_empty( rocksdb::ColumnFamilyHandle *cfh, const bool is_reverse_cf, const rocksdb::ReadOptions &read_opts, @@ -10368,7 +10796,7 @@ void Rdb_drop_index_thread::run() { "from cf id %u. MyRocks data dictionary may " "get corrupted.", d.cf_id); - abort_with_stack_traces(); + abort(); } rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id); DBUG_ASSERT(cfh); @@ -10457,6 +10885,7 @@ int ha_rocksdb::delete_table(const char *const tablename) { the persistent data dictionary). */ ddl_manager.remove(tbl, batch, true); + int err = dict_manager.commit(batch); if (err) { DBUG_RETURN(err); @@ -10487,10 +10916,12 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { const Rdb_key_def &kd = *tbl->m_key_descr_arr[i]; kd.get_infimum_key(reinterpret_cast<uchar *>(key_buf), &key_len); rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); - + const rocksdb::Slice table_key(key_buf, key_len); + setup_iterator_bounds(kd, table_key); + opts.iterate_lower_bound = &m_eq_cond_lower_bound_slice; + opts.iterate_upper_bound = &m_eq_cond_upper_bound_slice; std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(opts, cf)); - const rocksdb::Slice table_key(key_buf, key_len); it->Seek(table_key); while (it->Valid()) { const rocksdb::Slice key = it->key(); @@ -10569,6 +11000,7 @@ int ha_rocksdb::rename_table(const char *const from, const char *const to) { const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); rocksdb::WriteBatch *const batch = wb.get(); dict_manager.lock(); + if (ddl_manager.rename(from_str, to_str, batch)) { rc = HA_ERR_NO_SUCH_TABLE; } else { @@ -10619,7 +11051,7 @@ int ha_rocksdb::extra(enum ha_extra_function operation) { If the table has blobs, then they are part of m_retrieved_record. This call invalidates them. */ - m_retrieved_record.clear(); + m_retrieved_record.Reset(); break; default: break; @@ -10787,24 +11219,21 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd, std::unordered_map<rocksdb::ColumnFamilyHandle *, std::vector<rocksdb::Range>> ranges; std::unordered_set<GL_INDEX_ID> ids_to_check; - std::unordered_map<GL_INDEX_ID, uint> ids_to_keyparts; std::vector<uchar> buf(table_arg->s->keys * 2 * Rdb_key_def::INDEX_NUMBER_SIZE); + std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats; for (uint i = 0; i < table_arg->s->keys; i++) { const auto bufp = &buf[i * 2 * Rdb_key_def::INDEX_NUMBER_SIZE]; const Rdb_key_def &kd = *m_key_descr_arr[i]; + const GL_INDEX_ID index_id = kd.get_gl_index_id(); ranges[kd.get_cf()].push_back(get_range(i, bufp)); - ids_to_check.insert(kd.get_gl_index_id()); - ids_to_keyparts[kd.get_gl_index_id()] = kd.get_key_parts(); - } - // for analyze statements, force flush on memtable to get accurate cardinality - Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); - if (thd != nullptr && THDVAR(thd, flush_memtable_on_analyze) && - !rocksdb_pause_background_work) { - for (auto it : ids_to_check) { - rdb->Flush(rocksdb::FlushOptions(), cf_manager.get_cf(it.cf_id)); - } + ids_to_check.insert(index_id); + // Initialize the stats to 0. If there are no files that contain + // this gl_index_id, then 0 should be stored for the cached stats. + stats[index_id] = Rdb_index_stats(index_id); + DBUG_ASSERT(kd.get_key_parts() > 0); + stats[index_id].m_distinct_keys_per_prefix.resize(kd.get_key_parts()); } // get RocksDB table properties for these ranges @@ -10821,15 +11250,6 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd, } int num_sst = 0; - // group stats per index id - std::unordered_map<GL_INDEX_ID, Rdb_index_stats> stats; - for (const auto &it : ids_to_check) { - // Initialize the stats to 0. If there are no files that contain - // this gl_index_id, then 0 should be stored for the cached stats. - stats[it] = Rdb_index_stats(it); - DBUG_ASSERT(ids_to_keyparts.count(it) > 0); - stats[it].m_distinct_keys_per_prefix.resize(ids_to_keyparts[it]); - } for (const auto &it : props) { std::vector<Rdb_index_stats> sst_stats; Rdb_tbl_prop_coll::read_stats_from_tbl_props(it.second, &sst_stats); @@ -10856,6 +11276,53 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd, num_sst++; } + // calculate memtable cardinality + Rdb_tbl_card_coll cardinality_collector(rocksdb_table_stats_sampling_pct); + auto read_opts = rocksdb::ReadOptions(); + read_opts.read_tier = rocksdb::ReadTier::kMemtableTier; + for (uint i = 0; i < table_arg->s->keys; i++) { + const Rdb_key_def &kd = *m_key_descr_arr[i]; + Rdb_index_stats &stat = stats[kd.get_gl_index_id()]; + + uchar r_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + auto r = get_range(i, r_buf); + uint64_t memtableCount; + uint64_t memtableSize; + rdb->GetApproximateMemTableStats(kd.get_cf(), r, &memtableCount, + &memtableSize); + if (memtableCount < (uint64_t)stat.m_rows / 10) { + // skip tables that already have enough stats from SST files to reduce + // overhead and avoid degradation of big tables stats by sampling from + // relatively tiny (less than 10% of full data set) memtable dataset + continue; + } + + std::unique_ptr<rocksdb::Iterator> it = std::unique_ptr<rocksdb::Iterator>( + rdb->NewIterator(read_opts, kd.get_cf())); + + uchar *first_key; + uint key_size; + if (is_pk(i, table, m_tbl_def)) { + first_key = m_pk_packed_tuple; + } else { + first_key = m_sk_packed_tuple; + } + kd.get_first_key(first_key, &key_size); + rocksdb::Slice first_index_key((const char *)first_key, key_size); + + cardinality_collector.Reset(); + for (it->Seek(first_index_key); is_valid(it.get()); it->Next()) { + const rocksdb::Slice key = it->key(); + if (!kd.covers_key(key)) { + break; // end of this index + } + stat.m_rows++; + + cardinality_collector.ProcessKey(key, &kd, &stat); + } + cardinality_collector.AdjustStats(&stat); + } + // set and persist new stats ddl_manager.set_stats(stats); ddl_manager.persist_stats(true); @@ -10903,32 +11370,73 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, off = 1; } - longlong new_val; + Field *field; + ulonglong new_val, max_val; + field = table->key_info[table->s->next_number_index].key_part[0].field; + max_val = rdb_get_int_col_max_value(field); // Local variable reference to simplify code below - std::atomic<longlong> &auto_incr = m_tbl_def->m_auto_incr_val; + auto &auto_incr = m_tbl_def->m_auto_incr_val; - if (inc == 1 && off == 1) { + if (inc == 1) { + DBUG_ASSERT(off == 1); // Optimization for the standard case where we are always simply // incrementing from the last position // Use CAS operation in a loop to make sure automically get the next auto - // increment value while ensuring tha we don't wrap around to a negative + // increment value while ensuring that we don't wrap around to a negative // number. + // + // We set auto_incr to the min of max_val and new_val + 1. This means that + // if we're at the maximum, we should be returning the same value for + // multiple rows, resulting in duplicate key errors (as expected). + // + // If we return values greater than the max, the SQL layer will "truncate" + // the value anyway, but it means that we store invalid values into + // auto_incr that will be visible in SHOW CREATE TABLE. new_val = auto_incr; - while (new_val != std::numeric_limits<longlong>::max()) { - if (auto_incr.compare_exchange_weak(new_val, new_val + 1)) { + while (new_val != std::numeric_limits<ulonglong>::max()) { + if (auto_incr.compare_exchange_weak(new_val, + std::min(new_val + 1, max_val))) { break; } } } else { - // The next value can be more complicated if either `inc` or 'off' is not 1 - longlong last_val = auto_incr; + // The next value can be more complicated if either 'inc' or 'off' is not 1 + ulonglong last_val = auto_incr; // Loop until we can correctly update the atomic value do { - if (((last_val - off) / inc) == - (std::numeric_limits<longlong>::max() - off) / inc) { + DBUG_ASSERT(last_val > 0); + // Calculate the next value in the auto increment series: offset + // + N * increment where N is 0, 1, 2, ... + // + // For further information please visit: + // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html + // + // The following is confusing so here is an explanation: + // To get the next number in the sequence above you subtract out the + // offset, calculate the next sequence (N * increment) and then add the + // offset back in. + // + // The additions are rearranged to avoid overflow. The following is + // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact + // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why: + // + // (a+b)/c + // = (a - a%c + a%c + b - b%c + b%c) / c + // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c + // = a/c + b/c + (a%c + b%c) / c + // + // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the + // following statement. + ulonglong n = + (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc; + + // Check if n * inc + off will overflow. This can only happen if we have + // an UNSIGNED BIGINT field. + if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) { + DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max()); // The 'last_val' value is already equal to or larger than the largest // value in the sequence. Continuing would wrap around (technically // the behavior would be undefined). What should we do? @@ -10940,31 +11448,30 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, // may not be in our sequence, but it is guaranteed to be equal // to or larger than any other value already inserted. // - // For now I'm going to take option @2. - new_val = std::numeric_limits<longlong>::max(); + // For now I'm going to take option 2. + // + // Returning ULLONG_MAX from get_auto_increment will cause the SQL + // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to + // the SE API for get_auto_increment, inserts will fail with + // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but + // inserts will fail with ER_DUP_ENTRY for other types (or no failure + // if the column is in a non-unique SK). + new_val = std::numeric_limits<ulonglong>::max(); auto_incr = new_val; // Store the largest value into auto_incr break; } - // Calculate the next value in the auto increment series: - // offset + N * increment - // where N is 0, 1, 2, ... - // - // For further information please visit: - // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html - // - // The following is confusing so here is an explanation: - // To get the next number in the sequence above you subtract out - // the offset, calculate the next sequence (N * increment) and then add - // the offset back in. - new_val = (((last_val - off) + (inc - 1)) / inc) * inc + off; + new_val = n * inc + off; // Attempt to store the new value (plus 1 since m_auto_incr_val contains // the next available value) into the atomic value. If the current // value no longer matches what we have in 'last_val' this will fail and // we will repeat the loop (`last_val` will automatically get updated // with the current value). - } while (!auto_incr.compare_exchange_weak(last_val, new_val + 1)); + // + // See above explanation for inc == 1 for why we use std::min. + } while (!auto_incr.compare_exchange_weak(last_val, + std::min(new_val + 1, max_val))); } *first_value = new_val; @@ -11041,7 +11548,8 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | my_core::Alter_inplace_info::ADD_INDEX | my_core::Alter_inplace_info::ALTER_PARTITIONED | - my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) { + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX | + my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) { DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); } @@ -11052,6 +11560,13 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); } + /* We only support changing auto_increment for table options. */ + if ((ha_alter_info->handler_flags & + my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) && + !(ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) { + DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); + } + DBUG_RETURN(my_core::HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE); } @@ -11091,111 +11606,121 @@ bool ha_rocksdb::prepare_inplace_alter_table( DBUG_ASSERT(altered_table != nullptr); DBUG_ASSERT(ha_alter_info != nullptr); - const uint old_n_keys = m_tbl_def->m_key_count; + Rdb_tbl_def *new_tdef = nullptr; + std::shared_ptr<Rdb_key_def> *old_key_descr = nullptr; + std::shared_ptr<Rdb_key_def> *new_key_descr = nullptr; + uint old_n_keys = m_tbl_def->m_key_count; uint new_n_keys = altered_table->s->keys; + std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes; + std::unordered_set<GL_INDEX_ID> dropped_index_ids; + uint n_dropped_keys = 0; + uint n_added_keys = 0; + ulonglong max_auto_incr = 0; - if (has_hidden_pk(altered_table)) { - new_n_keys += 1; - } + if (ha_alter_info->handler_flags & + (my_core::Alter_inplace_info::DROP_INDEX | + my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | + my_core::Alter_inplace_info::ADD_INDEX | + my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) { - const TABLE *const old_table = table; - std::shared_ptr<Rdb_key_def> *const old_key_descr = - m_tbl_def->m_key_descr_arr; - std::shared_ptr<Rdb_key_def> *const new_key_descr = - new std::shared_ptr<Rdb_key_def>[new_n_keys]; + if (has_hidden_pk(altered_table)) { + new_n_keys += 1; + } - Rdb_tbl_def *const new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename()); - new_tdef->m_key_descr_arr = new_key_descr; - new_tdef->m_key_count = new_n_keys; - new_tdef->m_auto_incr_val = - m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed); - new_tdef->m_hidden_pk_val = - m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); + const TABLE *const old_table = table; + old_key_descr = m_tbl_def->m_key_descr_arr; + new_key_descr = new std::shared_ptr<Rdb_key_def>[new_n_keys]; - if (ha_alter_info->handler_flags & - (my_core::Alter_inplace_info::DROP_INDEX | - my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | - my_core::Alter_inplace_info::ADD_INDEX | - my_core::Alter_inplace_info::ADD_UNIQUE_INDEX) && - create_key_defs(altered_table, new_tdef, table, m_tbl_def)) { - /* Delete the new key descriptors */ - delete[] new_key_descr; + new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename()); + new_tdef->m_key_descr_arr = new_key_descr; + new_tdef->m_key_count = new_n_keys; + new_tdef->m_auto_incr_val = + m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed); + new_tdef->m_hidden_pk_val = + m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); - /* - Explicitly mark as nullptr so we don't accidentally remove entries - from data dictionary on cleanup (or cause double delete[]). - */ - new_tdef->m_key_descr_arr = nullptr; - delete new_tdef; + if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) { + /* Delete the new key descriptors */ + delete[] new_key_descr; - my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0)); - DBUG_RETURN(HA_EXIT_FAILURE); - } + /* + Explicitly mark as nullptr so we don't accidentally remove entries + from data dictionary on cleanup (or cause double delete[]). + */ + new_tdef->m_key_descr_arr = nullptr; + delete new_tdef; - std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes; - std::unordered_set<GL_INDEX_ID> dropped_index_ids; + my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } - uint i; - uint j; + uint i; + uint j; - /* Determine which(if any) key definition(s) need to be dropped */ - for (i = 0; i < ha_alter_info->index_drop_count; i++) { - const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i]; - for (j = 0; j < old_n_keys; j++) { - const KEY *const old_key = - &old_table->key_info[old_key_descr[j]->get_keyno()]; + /* Determine which(if any) key definition(s) need to be dropped */ + for (i = 0; i < ha_alter_info->index_drop_count; i++) { + const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i]; + for (j = 0; j < old_n_keys; j++) { + const KEY *const old_key = + &old_table->key_info[old_key_descr[j]->get_keyno()]; - if (!compare_keys(old_key, dropped_key)) { - dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id()); - break; + if (!compare_keys(old_key, dropped_key)) { + dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id()); + break; + } } } - } - - /* Determine which(if any) key definitions(s) need to be added */ - int identical_indexes_found = 0; - for (i = 0; i < ha_alter_info->index_add_count; i++) { - const KEY *const added_key = - &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; - for (j = 0; j < new_n_keys; j++) { - const KEY *const new_key = - &altered_table->key_info[new_key_descr[j]->get_keyno()]; - if (!compare_keys(new_key, added_key)) { - /* - Check for cases where an 'identical' index is being dropped and - re-added in a single ALTER statement. Turn this into a no-op as the - index has not changed. - E.G. Unique index -> non-unique index requires no change + /* Determine which(if any) key definitions(s) need to be added */ + int identical_indexes_found = 0; + for (i = 0; i < ha_alter_info->index_add_count; i++) { + const KEY *const added_key = + &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; + for (j = 0; j < new_n_keys; j++) { + const KEY *const new_key = + &altered_table->key_info[new_key_descr[j]->get_keyno()]; + if (!compare_keys(new_key, added_key)) { + /* + Check for cases where an 'identical' index is being dropped and + re-added in a single ALTER statement. Turn this into a no-op as the + index has not changed. + + E.G. Unique index -> non-unique index requires no change + + Note that cases where the index name remains the same but the + key-parts are changed is already handled in create_inplace_key_defs. + In these cases the index needs to be rebuilt. + */ + if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) { + dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id()); + identical_indexes_found++; + } else { + added_indexes.insert(new_key_descr[j]); + } - Note that cases where the index name remains the same but the - key-parts are changed is already handled in create_inplace_key_defs. - In these cases the index needs to be rebuilt. - */ - if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) { - dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id()); - identical_indexes_found++; - } else { - added_indexes.insert(new_key_descr[j]); + break; } - - break; } } - } - const uint n_dropped_keys = - ha_alter_info->index_drop_count - identical_indexes_found; - const uint n_added_keys = - ha_alter_info->index_add_count - identical_indexes_found; - DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys); - DBUG_ASSERT(added_indexes.size() == n_added_keys); - DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys)); + n_dropped_keys = ha_alter_info->index_drop_count - identical_indexes_found; + n_added_keys = ha_alter_info->index_add_count - identical_indexes_found; + DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys); + DBUG_ASSERT(added_indexes.size() == n_added_keys); + DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys)); + } + if (ha_alter_info->handler_flags & + my_core::Alter_inplace_info::CHANGE_CREATE_OPTION) { + if (!new_tdef) { + new_tdef = m_tbl_def; + } + max_auto_incr = load_auto_incr_value_from_index(); + } ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx( new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys, - added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys); - + added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys, + max_auto_incr); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -11349,9 +11874,10 @@ int ha_rocksdb::inplace_populate_sk( bool is_unique_index = new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME; - Rdb_index_merge rdb_merge( - thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size, - rdb_merge_tmp_file_removal_delay, index->get_cf()); + Rdb_index_merge rdb_merge(tx->get_rocksdb_tmpdir(), rdb_merge_buf_size, + rdb_merge_combine_read_size, + rdb_merge_tmp_file_removal_delay, + index->get_cf()); if ((res = rdb_merge.init())) { DBUG_RETURN(res); @@ -11655,6 +12181,29 @@ bool ha_rocksdb::commit_inplace_alter_table( rdb_drop_idx_thread.signal(); } + if (ha_alter_info->handler_flags & + (my_core::Alter_inplace_info::CHANGE_CREATE_OPTION)) { + const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + std::unordered_set<GL_INDEX_ID> create_index_ids; + + ulonglong auto_incr_val = ha_alter_info->create_info->auto_increment_value; + + for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) { + Rdb_inplace_alter_ctx *const ctx = + static_cast<Rdb_inplace_alter_ctx *>(*pctx); + auto_incr_val = std::max(auto_incr_val, ctx->m_max_auto_incr); + dict_manager.put_auto_incr_val( + batch, ctx->m_new_tdef->get_autoincr_gl_index_id(), auto_incr_val, + true /* overwrite */); + ctx->m_new_tdef->m_auto_incr_val = auto_incr_val; + } + + if (dict_manager.commit(batch)) { + DBUG_ASSERT(0); + } + } + DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -11682,15 +12231,29 @@ struct rocksdb_status_counters_t { uint64_t block_cache_miss; uint64_t block_cache_hit; uint64_t block_cache_add; + uint64_t block_cache_add_failures; uint64_t block_cache_index_miss; uint64_t block_cache_index_hit; + uint64_t block_cache_index_add; + uint64_t block_cache_index_bytes_insert; + uint64_t block_cache_index_bytes_evict; uint64_t block_cache_filter_miss; uint64_t block_cache_filter_hit; + uint64_t block_cache_filter_add; + uint64_t block_cache_filter_bytes_insert; + uint64_t block_cache_filter_bytes_evict; + uint64_t block_cache_bytes_read; + uint64_t block_cache_bytes_write; + uint64_t block_cache_data_bytes_insert; uint64_t block_cache_data_miss; uint64_t block_cache_data_hit; + uint64_t block_cache_data_add; uint64_t bloom_filter_useful; uint64_t memtable_hit; uint64_t memtable_miss; + uint64_t get_hit_l0; + uint64_t get_hit_l1; + uint64_t get_hit_l2_and_up; uint64_t compaction_key_drop_new; uint64_t compaction_key_drop_obsolete; uint64_t compaction_key_drop_user; @@ -11699,11 +12262,17 @@ struct rocksdb_status_counters_t { uint64_t number_keys_updated; uint64_t bytes_written; uint64_t bytes_read; + uint64_t number_db_seek; + uint64_t number_db_seek_found; + uint64_t number_db_next; + uint64_t number_db_next_found; + uint64_t number_db_prev; + uint64_t number_db_prev_found; + uint64_t iter_bytes_read; uint64_t no_file_closes; uint64_t no_file_opens; uint64_t no_file_errors; uint64_t stall_micros; - uint64_t rate_limit_delay_millis; uint64_t num_iterators; uint64_t number_multiget_get; uint64_t number_multiget_keys_read; @@ -11736,15 +12305,29 @@ static rocksdb_status_counters_t rocksdb_status_counters; DEF_SHOW_FUNC(block_cache_miss, BLOCK_CACHE_MISS) DEF_SHOW_FUNC(block_cache_hit, BLOCK_CACHE_HIT) DEF_SHOW_FUNC(block_cache_add, BLOCK_CACHE_ADD) +DEF_SHOW_FUNC(block_cache_add_failures, BLOCK_CACHE_ADD_FAILURES) DEF_SHOW_FUNC(block_cache_index_miss, BLOCK_CACHE_INDEX_MISS) DEF_SHOW_FUNC(block_cache_index_hit, BLOCK_CACHE_INDEX_HIT) +DEF_SHOW_FUNC(block_cache_index_add, BLOCK_CACHE_INDEX_ADD) +DEF_SHOW_FUNC(block_cache_index_bytes_insert, BLOCK_CACHE_INDEX_BYTES_INSERT) +DEF_SHOW_FUNC(block_cache_index_bytes_evict, BLOCK_CACHE_INDEX_BYTES_EVICT) DEF_SHOW_FUNC(block_cache_filter_miss, BLOCK_CACHE_FILTER_MISS) DEF_SHOW_FUNC(block_cache_filter_hit, BLOCK_CACHE_FILTER_HIT) +DEF_SHOW_FUNC(block_cache_filter_add, BLOCK_CACHE_FILTER_ADD) +DEF_SHOW_FUNC(block_cache_filter_bytes_insert, BLOCK_CACHE_FILTER_BYTES_INSERT) +DEF_SHOW_FUNC(block_cache_filter_bytes_evict, BLOCK_CACHE_FILTER_BYTES_EVICT) +DEF_SHOW_FUNC(block_cache_bytes_read, BLOCK_CACHE_BYTES_READ) +DEF_SHOW_FUNC(block_cache_bytes_write, BLOCK_CACHE_BYTES_WRITE) +DEF_SHOW_FUNC(block_cache_data_bytes_insert, BLOCK_CACHE_DATA_BYTES_INSERT) DEF_SHOW_FUNC(block_cache_data_miss, BLOCK_CACHE_DATA_MISS) DEF_SHOW_FUNC(block_cache_data_hit, BLOCK_CACHE_DATA_HIT) +DEF_SHOW_FUNC(block_cache_data_add, BLOCK_CACHE_DATA_ADD) DEF_SHOW_FUNC(bloom_filter_useful, BLOOM_FILTER_USEFUL) DEF_SHOW_FUNC(memtable_hit, MEMTABLE_HIT) DEF_SHOW_FUNC(memtable_miss, MEMTABLE_MISS) +DEF_SHOW_FUNC(get_hit_l0, GET_HIT_L0) +DEF_SHOW_FUNC(get_hit_l1, GET_HIT_L1) +DEF_SHOW_FUNC(get_hit_l2_and_up, GET_HIT_L2_AND_UP) DEF_SHOW_FUNC(compaction_key_drop_new, COMPACTION_KEY_DROP_NEWER_ENTRY) DEF_SHOW_FUNC(compaction_key_drop_obsolete, COMPACTION_KEY_DROP_OBSOLETE) DEF_SHOW_FUNC(compaction_key_drop_user, COMPACTION_KEY_DROP_USER) @@ -11753,11 +12336,17 @@ DEF_SHOW_FUNC(number_keys_read, NUMBER_KEYS_READ) DEF_SHOW_FUNC(number_keys_updated, NUMBER_KEYS_UPDATED) DEF_SHOW_FUNC(bytes_written, BYTES_WRITTEN) DEF_SHOW_FUNC(bytes_read, BYTES_READ) +DEF_SHOW_FUNC(number_db_seek, NUMBER_DB_SEEK) +DEF_SHOW_FUNC(number_db_seek_found, NUMBER_DB_SEEK_FOUND) +DEF_SHOW_FUNC(number_db_next, NUMBER_DB_NEXT) +DEF_SHOW_FUNC(number_db_next_found, NUMBER_DB_NEXT_FOUND) +DEF_SHOW_FUNC(number_db_prev, NUMBER_DB_PREV) +DEF_SHOW_FUNC(number_db_prev_found, NUMBER_DB_PREV_FOUND) +DEF_SHOW_FUNC(iter_bytes_read, ITER_BYTES_READ) DEF_SHOW_FUNC(no_file_closes, NO_FILE_CLOSES) DEF_SHOW_FUNC(no_file_opens, NO_FILE_OPENS) DEF_SHOW_FUNC(no_file_errors, NO_FILE_ERRORS) DEF_SHOW_FUNC(stall_micros, STALL_MICROS) -DEF_SHOW_FUNC(rate_limit_delay_millis, RATE_LIMIT_DELAY_MILLIS) DEF_SHOW_FUNC(num_iterators, NO_ITERATORS) DEF_SHOW_FUNC(number_multiget_get, NUMBER_MULTIGET_CALLS) DEF_SHOW_FUNC(number_multiget_keys_read, NUMBER_MULTIGET_KEYS_READ) @@ -11791,6 +12380,7 @@ static void myrocks_update_status() { export_stats.rows_updated = global_stats.rows[ROWS_UPDATED]; export_stats.rows_deleted_blind = global_stats.rows[ROWS_DELETED_BLIND]; export_stats.rows_expired = global_stats.rows[ROWS_EXPIRED]; + export_stats.rows_filtered = global_stats.rows[ROWS_FILTERED]; export_stats.system_rows_deleted = global_stats.system_rows[ROWS_DELETED]; export_stats.system_rows_inserted = global_stats.system_rows[ROWS_INSERTED]; @@ -11829,6 +12419,8 @@ static SHOW_VAR myrocks_status_variables[] = { SHOW_LONGLONG), DEF_STATUS_VAR_FUNC("rows_expired", &export_stats.rows_expired, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_filtered", &export_stats.rows_filtered, + SHOW_LONGLONG), DEF_STATUS_VAR_FUNC("system_rows_deleted", &export_stats.system_rows_deleted, SHOW_LONGLONG), DEF_STATUS_VAR_FUNC("system_rows_inserted", @@ -11947,15 +12539,29 @@ static SHOW_VAR rocksdb_status_vars[] = { DEF_STATUS_VAR(block_cache_miss), DEF_STATUS_VAR(block_cache_hit), DEF_STATUS_VAR(block_cache_add), + DEF_STATUS_VAR(block_cache_add_failures), DEF_STATUS_VAR(block_cache_index_miss), DEF_STATUS_VAR(block_cache_index_hit), + DEF_STATUS_VAR(block_cache_index_add), + DEF_STATUS_VAR(block_cache_index_bytes_insert), + DEF_STATUS_VAR(block_cache_index_bytes_evict), DEF_STATUS_VAR(block_cache_filter_miss), DEF_STATUS_VAR(block_cache_filter_hit), + DEF_STATUS_VAR(block_cache_filter_add), + DEF_STATUS_VAR(block_cache_filter_bytes_insert), + DEF_STATUS_VAR(block_cache_filter_bytes_evict), + DEF_STATUS_VAR(block_cache_bytes_read), + DEF_STATUS_VAR(block_cache_bytes_write), + DEF_STATUS_VAR(block_cache_data_bytes_insert), DEF_STATUS_VAR(block_cache_data_miss), DEF_STATUS_VAR(block_cache_data_hit), + DEF_STATUS_VAR(block_cache_data_add), DEF_STATUS_VAR(bloom_filter_useful), DEF_STATUS_VAR(memtable_hit), DEF_STATUS_VAR(memtable_miss), + DEF_STATUS_VAR(get_hit_l0), + DEF_STATUS_VAR(get_hit_l1), + DEF_STATUS_VAR(get_hit_l2_and_up), DEF_STATUS_VAR(compaction_key_drop_new), DEF_STATUS_VAR(compaction_key_drop_obsolete), DEF_STATUS_VAR(compaction_key_drop_user), @@ -11964,11 +12570,17 @@ static SHOW_VAR rocksdb_status_vars[] = { DEF_STATUS_VAR(number_keys_updated), DEF_STATUS_VAR(bytes_written), DEF_STATUS_VAR(bytes_read), + DEF_STATUS_VAR(number_db_seek), + DEF_STATUS_VAR(number_db_seek_found), + DEF_STATUS_VAR(number_db_next), + DEF_STATUS_VAR(number_db_next_found), + DEF_STATUS_VAR(number_db_prev), + DEF_STATUS_VAR(number_db_prev_found), + DEF_STATUS_VAR(iter_bytes_read), DEF_STATUS_VAR(no_file_closes), DEF_STATUS_VAR(no_file_opens), DEF_STATUS_VAR(no_file_errors), DEF_STATUS_VAR(stall_micros), - DEF_STATUS_VAR(rate_limit_delay_millis), DEF_STATUS_VAR(num_iterators), DEF_STATUS_VAR(number_multiget_get), DEF_STATUS_VAR(number_multiget_keys_read), @@ -11994,12 +12606,14 @@ static SHOW_VAR rocksdb_status_vars[] = { DEF_STATUS_VAR(number_superversion_releases), DEF_STATUS_VAR(number_superversion_cleanups), DEF_STATUS_VAR(number_block_not_compressed), + DEF_STATUS_VAR_PTR("row_lock_deadlocks", &rocksdb_row_lock_deadlocks, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("row_lock_wait_timeouts", + &rocksdb_row_lock_wait_timeouts, SHOW_LONGLONG), DEF_STATUS_VAR_PTR("snapshot_conflict_errors", &rocksdb_snapshot_conflict_errors, SHOW_LONGLONG), DEF_STATUS_VAR_PTR("wal_group_syncs", &rocksdb_wal_group_syncs, SHOW_LONGLONG), - DEF_STATUS_VAR_PTR("number_stat_computes", &rocksdb_number_stat_computes, - SHOW_LONGLONG), DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put, SHOW_LONGLONG), DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete, @@ -12066,8 +12680,8 @@ void Rdb_background_thread::run() { // InnoDB's behavior. For mode never, the wal file isn't even written, // whereas background writes to the wal file, but issues the syncs in a // background thread. - if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) { - DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes); + if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) && + !rocksdb_db_options->allow_mmap_writes) { const rocksdb::Status s = rdb->FlushWAL(true); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD); @@ -12079,6 +12693,16 @@ void Rdb_background_thread::run() { ddl_manager.persist_stats(); } +bool ha_rocksdb::check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys) { + bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond, use_all_keys); + if (!can_use_bloom) { + setup_iterator_bounds(kd, eq_cond); + } + return can_use_bloom; +} + /** Deciding if it is possible to use bloom filter or not. @@ -12097,9 +12721,9 @@ void Rdb_background_thread::run() { @param use_all_keys True if all key parts are set with equal conditions. This is aware of extended keys. */ -bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, - const rocksdb::Slice &eq_cond, - const bool use_all_keys) { +bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys) { bool can_use = false; if (THDVAR(thd, skip_bloom_filter_on_read)) { @@ -12237,7 +12861,7 @@ void rdb_handle_io_error(const rocksdb::Status status, rdb_log_status_error(status, "failed to write to WAL"); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting on WAL write error."); - abort_with_stack_traces(); + abort(); break; } case RDB_IO_ERROR_BG_THREAD: { @@ -12248,7 +12872,7 @@ void rdb_handle_io_error(const rocksdb::Status status, rdb_log_status_error(status, "failed on I/O"); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting on I/O error."); - abort_with_stack_traces(); + abort(); break; } default: @@ -12257,16 +12881,17 @@ void rdb_handle_io_error(const rocksdb::Status status, } } else if (status.IsCorruption()) { rdb_log_status_error(status, "data corruption detected!"); + rdb_persist_corruption_marker(); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting because of data corruption."); - abort_with_stack_traces(); + abort(); } else if (!status.ok()) { switch (err_type) { case RDB_IO_ERROR_DICT_COMMIT: { rdb_log_status_error(status, "Failed to write to WAL (dictionary)"); /* NO_LINT_DEBUG */ sql_print_error("MyRocks: aborting on WAL write error."); - abort_with_stack_traces(); + abort(); break; } default: @@ -12423,11 +13048,43 @@ void rocksdb_set_collation_exception_list(THD *const thd, *static_cast<const char**>(var_ptr) = val_copy; } -void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var - MY_ATTRIBUTE((__unused__)), - void *const var_ptr, const void *const save) { - Rdb_transaction *&tx = get_tx_from_thd(thd); +int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) { + int new_value_type = value->value_type(value); + if (new_value_type == MYSQL_VALUE_TYPE_STRING) { + char buf[16]; + int len = sizeof(buf); + const char *str = value->val_str(value, buf, &len); + if (str && (my_strcasecmp(system_charset_info, "true", str) == 0 || + my_strcasecmp(system_charset_info, "on", str) == 0)) { + *return_value = TRUE; + } else if (str && (my_strcasecmp(system_charset_info, "false", str) == 0 || + my_strcasecmp(system_charset_info, "off", str) == 0)) { + *return_value = FALSE; + } else { + return 1; + } + } else if (new_value_type == MYSQL_VALUE_TYPE_INT) { + long long intbuf; + value->val_int(value, &intbuf); + if (intbuf > 1) + return 1; + *return_value = intbuf > 0 ? TRUE : FALSE; + } else { + return 1; + } + + return 0; +} + +int rocksdb_check_bulk_load( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value) { + my_bool new_value; + if (mysql_value_to_bool(value, &new_value) != 0) { + return 1; + } + Rdb_transaction *&tx = get_tx_from_thd(thd); if (tx != nullptr) { const int rc = tx->finish_bulk_load(); if (rc != 0) { @@ -12435,30 +13092,32 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var sql_print_error("RocksDB: Error %d finalizing last SST file while " "setting bulk loading variable", rc); - /* - MariaDB doesn't do the following: - abort_with_stack_traces(); - because it doesn't seem a good idea to crash a server when a user makes - a mistake. - Instead, we return an error to the user. The error has already been - produced inside ha_rocksdb::finalize_bulk_load(). - */ + THDVAR(thd, bulk_load) = 0; + return 1; } } - *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save); + *static_cast<bool *>(save) = new_value; + return 0; } -void rocksdb_set_bulk_load_allow_unsorted( - THD *const thd, - struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), - void *const var_ptr, const void *const save) { +int rocksdb_check_bulk_load_allow_unsorted( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value) { + my_bool new_value; + if (mysql_value_to_bool(value, &new_value) != 0) { + return 1; + } + if (THDVAR(thd, bulk_load)) { my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET", "Cannot change this setting while bulk load is enabled"); - } else { - *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save); + + return 1; } + + *static_cast<bool *>(save) = new_value; + return 0; } static void rocksdb_set_max_background_jobs(THD *thd, @@ -12489,35 +13148,116 @@ static void rocksdb_set_max_background_jobs(THD *thd, RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } -void rocksdb_set_update_cf_options(THD *const /* unused */, - struct st_mysql_sys_var *const /* unused */, - void *const var_ptr, - const void *const save) { +static void rocksdb_set_bytes_per_sync( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rocksdb_db_options != nullptr); + DBUG_ASSERT(rocksdb_db_options->env != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const ulonglong new_val = *static_cast<const ulonglong *>(save); + + if (rocksdb_db_options->bytes_per_sync != new_val) { + rocksdb_db_options->bytes_per_sync = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"bytes_per_sync", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning("MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_wal_bytes_per_sync( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rocksdb_db_options != nullptr); + DBUG_ASSERT(rocksdb_db_options->env != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const ulonglong new_val = *static_cast<const ulonglong *>(save); + + if (rocksdb_db_options->wal_bytes_per_sync != new_val) { + rocksdb_db_options->wal_bytes_per_sync = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"wal_bytes_per_sync", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning("MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static int +rocksdb_validate_update_cf_options(THD * /* unused */, + struct st_mysql_sys_var * /*unused*/, + void *save, struct st_mysql_value *value) { + + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + int length; + length = sizeof(buff); + str = value->val_str(value, buff, &length); + *(const char **)save = str; + + if (str == nullptr) { + return HA_EXIT_SUCCESS; + } + + Rdb_cf_options::Name_to_config_t option_map; + + // Basic sanity checking and parsing the options into a map. If this fails + // then there's no point to proceed. + if (!Rdb_cf_options::parse_cf_options(str, &option_map)) { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str); + return HA_EXIT_FAILURE; + } + return HA_EXIT_SUCCESS; +} + +static void +rocksdb_set_update_cf_options(THD *const /* unused */, + struct st_mysql_sys_var *const /* unused */, + void *const var_ptr, const void *const save) { const char *const val = *static_cast<const char *const *>(save); + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + if (!val) { - // NO_LINT_DEBUG - sql_print_warning("MyRocks: NULL is not a valid option for updates to " - "column family settings."); + *reinterpret_cast<char **>(var_ptr) = nullptr; + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); return; } - RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); - DBUG_ASSERT(val != nullptr); + // Reset the pointers regardless of how much success we had with updating + // the CF options. This will results in consistent behavior and avoids + // dealing with cases when only a subset of CF-s was successfully updated. + *reinterpret_cast<char **>(var_ptr) = my_strdup(val, MYF(0)); + // Do the real work of applying the changes. Rdb_cf_options::Name_to_config_t option_map; - // Basic sanity checking and parsing the options into a map. If this fails - // then there's no point to proceed. + // This should never fail, because of rocksdb_validate_update_cf_options if (!Rdb_cf_options::parse_cf_options(val, &option_map)) { my_free(*reinterpret_cast<char**>(var_ptr)); - *reinterpret_cast<char**>(var_ptr) = nullptr; - - // NO_LINT_DEBUG - sql_print_warning("MyRocks: failed to parse the updated column family " - "options = '%s'.", val); RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); return; } @@ -12577,16 +13317,6 @@ void rocksdb_set_update_cf_options(THD *const /* unused */, } } - // Reset the pointers regardless of how much success we had with updating - // the CF options. This will results in consistent behavior and avoids - // dealing with cases when only a subset of CF-s was successfully updated. - if (val) { - my_free(*reinterpret_cast<char**>(var_ptr)); - *reinterpret_cast<char**>(var_ptr) = my_strdup(val, MYF(0)); - } else { - *reinterpret_cast<char**>(var_ptr) = nullptr; - } - // Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to // free up resources used before. @@ -12655,6 +13385,12 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) { DBUG_RETURN((rows / 20.0) + 1); } +std::string rdb_corruption_marker_file_name() { + std::string ret(rocksdb_datadir); + ret.append("/ROCKSDB_CORRUPTED"); + return ret; +} + void sql_print_verbose_info(const char *format, ...) { va_list args; @@ -12712,5 +13448,6 @@ maria_declare_plugin(rocksdb_se){ myrocks::rdb_i_s_cfoptions, myrocks::rdb_i_s_compact_stats, myrocks::rdb_i_s_global_info, myrocks::rdb_i_s_ddl, myrocks::rdb_i_s_index_file_map, myrocks::rdb_i_s_lock_info, - myrocks::rdb_i_s_trx_info + myrocks::rdb_i_s_trx_info, + myrocks::rdb_i_s_deadlock_info maria_declare_plugin_end; diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index d929ca15093..0b008a64390 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -41,6 +41,7 @@ /* RocksDB header files */ #include "rocksdb/cache.h" +#include "rocksdb/merge_operator.h" #include "rocksdb/perf_context.h" #include "rocksdb/sst_file_manager.h" #include "rocksdb/statistics.h" @@ -93,6 +94,25 @@ struct Rdb_trx_info { std::vector<Rdb_trx_info> rdb_get_all_trx_info(); /* + * class for exporting deadlock transaction information for + * information_schema.rocksdb_deadlock + */ +struct Rdb_deadlock_info { + struct Rdb_dl_trx_info { + ulonglong trx_id; + std::string cf_name; + std::string waiting_key; + bool exclusive_lock; + std::string index_name; + std::string table_name; + }; + std::vector <Rdb_dl_trx_info> path; + ulonglong victim_trx_id; +}; + +std::vector<Rdb_deadlock_info> rdb_get_deadlock_info(); + +/* This is - the name of the default Column Family (the CF which stores indexes which didn't explicitly specify which CF they are in) @@ -250,6 +270,8 @@ enum collations_used { */ #define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong) +#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong) + /* Maximum index prefix length in bytes. */ @@ -367,6 +389,7 @@ enum operation_type : int { ROWS_UPDATED, ROWS_DELETED_BLIND, ROWS_EXPIRED, + ROWS_FILTERED, ROWS_HIDDEN_NO_SNAPSHOT, ROWS_MAX }; @@ -400,6 +423,7 @@ struct st_export_stats { ulonglong rows_updated; ulonglong rows_deleted_blind; ulonglong rows_expired; + ulonglong rows_filtered; ulonglong rows_hidden_no_snapshot; ulonglong system_rows_deleted; @@ -538,6 +562,12 @@ class ha_rocksdb : public my_core::handler { uchar *m_dup_sk_packed_tuple; uchar *m_dup_sk_packed_tuple_old; + /* Buffers used for passing upper/bound eq conditions. */ + uchar *m_eq_cond_lower_bound; + uchar *m_eq_cond_upper_bound; + rocksdb::Slice m_eq_cond_lower_bound_slice; + rocksdb::Slice m_eq_cond_upper_bound_slice; + /* Temporary space for packing VARCHARs (we provide it to pack_record()/pack_index_tuple() calls). @@ -567,7 +597,7 @@ class ha_rocksdb : public my_core::handler { This is used only when we get the record with rocksdb's Get() call (The other option is when we get a rocksdb::Slice from an iterator) */ - std::string m_retrieved_record; + rocksdb::PinnableSlice m_retrieved_record; /* Type of locking to apply to rows */ enum { RDB_LOCK_NONE, RDB_LOCK_READ, RDB_LOCK_WRITE } m_lock_rows; @@ -600,12 +630,7 @@ class ha_rocksdb : public my_core::handler { bool m_update_scope_is_valid; /* SST information used for bulk loading the primary key */ - std::unique_ptr<Rdb_sst_info> m_sst_info; - /* External merge sorts for bulk load: key ID -> merge sort instance */ - std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge; - Rdb_transaction *m_bulk_load_tx; - /* Mutex to protect finalizing bulk load */ - mysql_mutex_t m_bulk_load_mutex; + std::shared_ptr<Rdb_sst_info> m_sst_info; /* MySQL index number for duplicate key error @@ -620,13 +645,17 @@ class ha_rocksdb : public my_core::handler { int secondary_index_read(const int keyno, uchar *const buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void setup_iterator_for_rnd_scan(); - void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *const slice) - MY_ATTRIBUTE((__nonnull__)) { - setup_scan_iterator(kd, slice, false, 0); - } bool is_ascending(const Rdb_key_def &keydef, enum ha_rkey_function find_flag) const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void setup_iterator_bounds(const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond); + bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys); + bool check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys); void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice, const bool use_all_keys, const uint eq_cond_len) MY_ATTRIBUTE((__nonnull__)); @@ -635,7 +664,8 @@ class ha_rocksdb : public my_core::handler { rocksdb::Status get_for_update(Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, - const rocksdb::Slice &key, std::string *const value) const; + const rocksdb::Slice &key, + rocksdb::PinnableSlice *value) const; int get_row_by_rowid(uchar *const buf, const char *const rowid, const uint rowid_size, const bool skip_lookup = false, @@ -649,8 +679,13 @@ class ha_rocksdb : public my_core::handler { rowid_size, skip_lookup, skip_ttl_check); } - void update_auto_incr_val(); void load_auto_incr_value(); + ulonglong load_auto_incr_value_from_index(); + void update_auto_incr_val(ulonglong val); + void update_auto_incr_val_from_field(); + rocksdb::Status get_datadic_auto_incr(Rdb_transaction *const tx, + const GL_INDEX_ID &gl_index_id, + ulonglong *new_val) const; longlong update_hidden_pk_val(); int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__)); int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) @@ -696,6 +731,12 @@ class ha_rocksdb : public my_core::handler { */ std::vector<READ_FIELD> m_decoders_vect; + /* + This tells if any field which is part of the key needs to be unpacked and + decoded. + */ + bool m_key_requested = false; + /* Setup field_decoders based on type of scan and table->read_set */ void setup_read_decoders(); @@ -766,9 +807,12 @@ public: my_core::TABLE_SHARE *const table_arg); ~ha_rocksdb() { int err MY_ATTRIBUTE((__unused__)); - err = finalize_bulk_load(); - DBUG_ASSERT(err == 0); - mysql_mutex_destroy(&m_bulk_load_mutex); + err = finalize_bulk_load(false); + if (err != 0) { + sql_print_error("RocksDB: Error %d finalizing bulk load while closing " + "handler.", + err); + } } /** @brief @@ -794,10 +838,9 @@ public: */ /* - See if this is the same base table - this should only be true for different - partitions of the same table. + Returns the name of the table's base name */ - bool same_table(const ha_rocksdb &other) const; + const std::string &get_table_basename() const; /** @brief This is a list of flags that indicate what functionality the storage engine @@ -1210,8 +1253,6 @@ private: Rdb_tbl_def *get_table_if_exists(const char *const tablename) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); void read_thd_vars(THD *const thd) MY_ATTRIBUTE((__nonnull__)); - const char *thd_rocksdb_tmpdir() - MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); bool contains_foreign_key(THD *const thd) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -1221,6 +1262,9 @@ private: const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int finalize_bulk_load(bool print_client_error = true) + MY_ATTRIBUTE((__warn_unused_result__)); + public: int index_init(uint idx, bool sorted) override MY_ATTRIBUTE((__warn_unused_result__)); @@ -1264,7 +1308,7 @@ public: DBUG_ENTER_FUNC(); /* Free blob data */ - m_retrieved_record.clear(); + m_retrieved_record.Reset(); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -1335,8 +1379,6 @@ public: my_core::Alter_inplace_info *const ha_alter_info, bool commit) override; - int finalize_bulk_load() MY_ATTRIBUTE((__warn_unused_result__)); - #ifdef MARIAROCKS_NOT_YET // MDEV-10976 void set_use_read_free_rpl(const char *const whitelist); #endif @@ -1391,18 +1433,22 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx { /* Stores number of keys to drop */ const uint m_n_dropped_keys; + /* Stores the largest current auto increment value in the index */ + const ulonglong m_max_auto_incr; + Rdb_inplace_alter_ctx( Rdb_tbl_def *new_tdef, std::shared_ptr<Rdb_key_def> *old_key_descr, std::shared_ptr<Rdb_key_def> *new_key_descr, uint old_n_keys, uint new_n_keys, std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes, std::unordered_set<GL_INDEX_ID> dropped_index_ids, uint n_added_keys, - uint n_dropped_keys) + uint n_dropped_keys, ulonglong max_auto_incr) : my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef), m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr), m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys), m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids), - m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys) {} + m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys), + m_max_auto_incr(max_auto_incr) {} ~Rdb_inplace_alter_ctx() {} @@ -1412,6 +1458,9 @@ private: Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &); }; +// file name indicating RocksDB data corruption +std::string rdb_corruption_marker_file_name(); + const int MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL= MariaDB_PLUGIN_MATURITY_GAMMA; extern bool prevent_myrocks_loading; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc new file mode 100644 index 00000000000..ba2e7ace0c5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc @@ -0,0 +1,150 @@ +--echo # +--echo # Testing concurrent transactions. +--echo # + +--source include/count_sessions.inc +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); +connect (con3,localhost,root,,); + +connection con1; +begin; +insert into t values (); # 1 + +connection con2; +begin; +insert into t values (); # 2 + +connection con3; +begin; +insert into t values (); # 3 + +connection con1; +insert into t values (); # 4 + +connection con2; +insert into t values (); # 5 + +connection con3; +insert into t values (); # 6 + +connection con2; +commit; + +connection con3; +rollback; + +connection con1; +commit; + +delete from t; + +--echo # Master value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Slave value before restart +sync_slave_with_master; +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +connection slave; +--source include/stop_slave.inc +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc + +connection default; +--echo # Master value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--let $rpl_server_number = 2 +--source include/rpl_restart_server.inc + +connection slave; +--source include/start_slave.inc +--echo # Slave value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +disconnect con1; +disconnect con2; +disconnect con3; +--source include/wait_until_count_sessions.inc + +--echo # +--echo # Testing interaction of merge markers with various DDL statements. +--echo # +connection slave; +--source include/stop_slave.inc + +connection default; + +--echo # Drop and add primary key. +alter table t modify i int; +alter table t drop primary key; +alter table t add primary key (i); +alter table t modify i int auto_increment; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Remove auto_increment property. +alter table t modify i int; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Add auto_increment property. +insert into t values (123); +alter table t modify i int auto_increment; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Add column j. +alter table t add column j int; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Rename tables. +rename table t to t2; +rename table t2 to t; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Change auto_increment property +alter table t auto_increment = 1000; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t drop primary key, add key (i), auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t add key (j), auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t modify i int; +alter table t add column (k int auto_increment), add key(k), auto_increment=15; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Drop table. +drop table t; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc + +connection slave; +--source include/start_slave.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc index 87cb1f70f32..6472b969ce6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc @@ -1,6 +1,4 @@ ---disable_warnings -DROP TABLE IF EXISTS t1, t2, t3; ---enable_warnings +--source include/count_sessions.inc if ($data_order_desc) { @@ -20,7 +18,7 @@ eval CREATE TABLE t1( b CHAR(30), PRIMARY KEY(pk) COMMENT "$pk_cf", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; # Create a second identical table to validate that bulk loading different # tables in the same session works @@ -30,7 +28,7 @@ eval CREATE TABLE t2( b CHAR(30), PRIMARY KEY(pk) COMMENT "$pk_cf", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; # Create a third table using partitions to validate that bulk loading works # across a partitioned table @@ -40,7 +38,7 @@ eval CREATE TABLE t3( b CHAR(30), PRIMARY KEY(pk) COMMENT "$pk_cf", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; --let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` @@ -154,3 +152,5 @@ EOF # Cleanup disconnect other; DROP TABLE t1, t2, t3; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc new file mode 100644 index 00000000000..4a3158e814c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc @@ -0,0 +1,144 @@ +--source include/have_partition.inc +--source include/count_sessions.inc + +SET rocksdb_bulk_load_size=3; +SET rocksdb_bulk_load_allow_unsorted=1; + +### Test individual INSERTs ### + +# A table with only a PK won't have rows until the bulk load is finished +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--disable_query_log +let $sign = 1; +let $max = 5; +let $i = 1; +while ($i <= $max) { + let $a = 1 + $sign * $i; + let $b = 1 - $sign * $i; + let $sign = -$sign; + let $insert = INSERT INTO t1 VALUES ($a, $b); + eval $insert; + inc $i; +} +--enable_query_log +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1; + +# A table with a PK and a SK shows rows immediately +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b)) + ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--disable_query_log +let $sign = 1; +let $max = 5; +let $i = 1; +while ($i <= $max) { + let $a = 1 + $sign * $i; + let $b = 1 - $sign * $i; + let $sign = -$sign; + let $insert = INSERT INTO t1 VALUES ($a, $b); + eval $insert; + inc $i; +} +--enable_query_log + +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1; + +# Inserting into another table finishes bulk load to the previous table +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; + +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1,1); +INSERT INTO t2 VALUES (1,1); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +INSERT INTO t1 VALUES (2,2); +SELECT * FROM t2 FORCE INDEX (PRIMARY); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1, t2; + +### Test bulk load from a file ### +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; + +--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` +# Create a text file with data to import into the table. +# PK and SK are not in any order +--let ROCKSDB_INFILE = $file +perl; +my $fn = $ENV{'ROCKSDB_INFILE'}; +open(my $fh, '>', $fn) || die "perl open($fn): $!"; +my $max = 5000000; +my $sign = 1; +for (my $ii = 0; $ii < $max; $ii++) +{ + my $a = 1 + $sign * $ii; + my $b = 1 - $sign * $ii; + $sign = -$sign; + print $fh "$a\t$b\n"; +} +close($fh); +EOF +--file_exists $file + +# Make sure a snapshot held by another user doesn't block the bulk load +connect (other,localhost,root,,); +set session transaction isolation level repeatable read; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +start transaction with consistent snapshot; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +connection default; +set rocksdb_bulk_load=1; +set rocksdb_bulk_load_size=100000; +--disable_query_log +--echo LOAD DATA INFILE <input_file> INTO TABLE t1; +eval LOAD DATA INFILE '$file' INTO TABLE t1; +--echo LOAD DATA INFILE <input_file> INTO TABLE t2; +eval LOAD DATA INFILE '$file' INTO TABLE t2; +--echo LOAD DATA INFILE <input_file> INTO TABLE t3; +eval LOAD DATA INFILE '$file' INTO TABLE t3; +--enable_query_log +set rocksdb_bulk_load=0; + +--remove_file $file + +# Make sure row count index stats are correct +--replace_column 6 # 7 # 8 # 9 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +ANALYZE TABLE t1, t2, t3; + +--replace_column 6 # 7 # 8 # 9 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +# Make sure all the data is there. +select count(a) from t1; +select count(b) from t1; +select count(a) from t2; +select count(b) from t2; +select count(a) from t3; +select count(b) from t3; + +SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3; +SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3; + +disconnect other; +DROP TABLE t1, t2, t3; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc new file mode 100644 index 00000000000..8eef7ed2162 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc @@ -0,0 +1,8 @@ +--source include/shutdown_mysqld.inc + +# Expect the server to fail to come up with these options +--error 1 +--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option + +# Restart the server with the default options +--source include/start_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc new file mode 100644 index 00000000000..73e30b3e46c --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc @@ -0,0 +1,14 @@ +# Include this script only after using shutdown_mysqld.inc +# where $_expect_file_name was initialized. +# Write file to make mysql-test-run.pl start up the server again +--exec echo "restart:$_mysqld_option" > $_expect_file_name + +# Turn on reconnect +--enable_reconnect + +# Call script that will poll the server waiting for it to be back online again +--source include/wait_until_connected_again.inc + +# Turn off reconnect again +--disable_reconnect + diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result index 0c3ad720194..18365338d0c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result @@ -15,6 +15,10 @@ count(b) 300000 ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +ERROR HY000: Status error 10 received from RocksDB: Operation aborted: Failed to acquire lock due to max_num_locks limit +set session rocksdb_bulk_load=1; +ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +set session rocksdb_bulk_load=0; SELECT COUNT(*) as c FROM (SELECT COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `b`, CONCAT(ISNULL(`b`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `t1` FORCE INDEX(`kb`) UNION DISTINCT diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result index f8508febb01..5d947603ec5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result @@ -778,3 +778,20 @@ set global rocksdb_force_flush_memtable_now = true; select * from t1; col1 col2 extra DROP TABLE t1; +create table t1 (i int auto_increment, key(i)) engine=rocksdb; +insert into t1 values(); +insert into t1 values(); +insert into t1 values(); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL AUTO_INCREMENT, + KEY `i` (`i`) +) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL AUTO_INCREMENT, + KEY `i` (`i`) +) ENGINE=ROCKSDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1 +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result new file mode 100644 index 00000000000..9b5a335b6f8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result @@ -0,0 +1,38 @@ +# +# Test how MyRocks behaves when RocksDB reports corrupted data. +# +# +# Test server crashes on corrupted data and restarts +# +create table t1 ( +pk int not null primary key, +col1 varchar(10) +) engine=rocksdb; +insert into t1 values (1,1),(2,2),(3,3); +select * from t1 where pk=1; +pk col1 +1 1 +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; +select * from t1 where pk=1; +ERROR HY000: Lost connection to MySQL server during query +FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err +# +# The same for scan queries +# +select * from t1; +pk col1 +1 1 +2 2 +3 3 +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; +select * from t1; +ERROR HY000: Lost connection to MySQL server during query +FOUND 1 /data corruption detected/ in allow_to_start_after_corruption_debug.err +# +# Test restart failure. The server is shutdown at this point. +# +FOUND 1 /The server will exit normally and stop restart attempts/ in allow_to_start_after_corruption_debug.err +# +# Remove corruption file and restart cleanly +# +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result new file mode 100644 index 00000000000..60395eced7e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result @@ -0,0 +1,132 @@ +include/master-slave.inc +[connection master] +create table t (i int primary key auto_increment) engine=rocksdb; +# +# Testing concurrent transactions. +# +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connect con3,localhost,root,,; +connection con1; +begin; +insert into t values (); +connection con2; +begin; +insert into t values (); +connection con3; +begin; +insert into t values (); +connection con1; +insert into t values (); +connection con2; +insert into t values (); +connection con3; +insert into t values (); +connection con2; +commit; +connection con3; +rollback; +connection con1; +commit; +delete from t; +# Master value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 7 +# Slave value before restart +connection slave; +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +connection slave; +include/stop_slave.inc +include/rpl_restart_server.inc [server_number=1] +connection default; +# Master value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +include/rpl_restart_server.inc [server_number=2] +connection slave; +include/start_slave.inc +# Slave value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +disconnect con1; +disconnect con2; +disconnect con3; +# +# Testing interaction of merge markers with various DDL statements. +# +connection slave; +include/stop_slave.inc +connection default; +# Drop and add primary key. +alter table t modify i int; +alter table t drop primary key; +alter table t add primary key (i); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +# Remove auto_increment property. +alter table t modify i int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t NULL +# Add auto_increment property. +insert into t values (123); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Add column j. +alter table t add column j int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Rename tables. +rename table t to t2; +rename table t2 to t; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Change auto_increment property +alter table t auto_increment = 1000; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 1000 +alter table t auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t drop primary key, add key (i), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t add key (j), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t modify i int; +alter table t add column (k int auto_increment), add key(k), auto_increment=15; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 16 +# Drop table. +drop table t; +include/rpl_restart_server.inc [server_number=1] +connection slave; +include/start_slave.inc +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result new file mode 100644 index 00000000000..c837fb7c77d --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result @@ -0,0 +1,132 @@ +include/master-slave.inc +[connection master] +create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3; +# +# Testing concurrent transactions. +# +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connect con3,localhost,root,,; +connection con1; +begin; +insert into t values (); +connection con2; +begin; +insert into t values (); +connection con3; +begin; +insert into t values (); +connection con1; +insert into t values (); +connection con2; +insert into t values (); +connection con3; +insert into t values (); +connection con2; +commit; +connection con3; +rollback; +connection con1; +commit; +delete from t; +# Master value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 7 +# Slave value before restart +connection slave; +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +connection slave; +include/stop_slave.inc +include/rpl_restart_server.inc [server_number=1] +connection default; +# Master value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +include/rpl_restart_server.inc [server_number=2] +connection slave; +include/start_slave.inc +# Slave value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +disconnect con1; +disconnect con2; +disconnect con3; +# +# Testing interaction of merge markers with various DDL statements. +# +connection slave; +include/stop_slave.inc +connection default; +# Drop and add primary key. +alter table t modify i int; +alter table t drop primary key; +alter table t add primary key (i); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 1 +# Remove auto_increment property. +alter table t modify i int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t NULL +# Add auto_increment property. +insert into t values (123); +alter table t modify i int auto_increment; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Add column j. +alter table t add column j int; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Rename tables. +rename table t to t2; +rename table t2 to t; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +# Change auto_increment property +alter table t auto_increment = 1000; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 1000 +alter table t auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t drop primary key, add key (i), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t add key (j), auto_increment = 1; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 124 +alter table t modify i int; +alter table t add column (k int auto_increment), add key(k), auto_increment=15; +include/rpl_restart_server.inc [server_number=1] +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 16 +# Drop table. +drop table t; +include/rpl_restart_server.inc [server_number=1] +connection slave; +include/start_slave.inc +include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result new file mode 100644 index 00000000000..fe08cd7c361 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result @@ -0,0 +1,107 @@ +# +# Testing upgrading from server without merges for auto_increment +# to new server with such support. +# +set debug_dbug='+d,myrocks_autoinc_upgrade'; +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; +i +1 +2 +3 +delete from t where i > 1; +select * from t; +i +1 +select table_name, index_name, auto_increment +from information_schema.rocksdb_ddl where table_name = 't'; +table_name index_name auto_increment +t PRIMARY NULL +set debug_dbug='-d,myrocks_autoinc_upgrade'; +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; +i +1 +2 +3 +4 +select table_name, index_name, auto_increment +from information_schema.rocksdb_ddl where table_name = 't'; +table_name index_name auto_increment +t PRIMARY 5 +delete from t where i > 1; +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; +i +1 +5 +6 +7 +drop table t; +# +# Testing crash safety of transactions. +# +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); +# Before anything +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_before"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 4 +select max(i) from t; +max(i) +3 +# After engine prepare +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_after_prepare"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 4 +select max(i) from t; +max(i) +3 +# After binlog +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_after_log"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 6 +select max(i) from t; +max(i) +5 +# After everything +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_after"; +commit; +ERROR HY000: Lost connection to MySQL server during query +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +table_schema table_name auto_increment +test t 8 +select max(i) from t; +max(i) +7 +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result index 0fb3d96c58f..5da9a7e7e1c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result @@ -61,3 +61,82 @@ LAST_INSERT_ID() SELECT a FROM t1 ORDER BY a; a DROP TABLE t1; +#--------------------------- +# test large autoincrement values +#--------------------------- +SET auto_increment_increment = 1; +SET auto_increment_offset = 1; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'c'); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +a b +18446744073709551613 a +18446744073709551614 b +DROP TABLE t1; +SET auto_increment_increment = 300; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'b'); +ERROR HY000: Failed to read auto-increment value from storage engine +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'c'); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +a b +18446744073709551613 a +DROP TABLE t1; +SET auto_increment_offset = 200; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551614 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'b'); +ERROR HY000: Failed to read auto-increment value from storage engine +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `b` char(8) DEFAULT NULL, + PRIMARY KEY (`a`) +) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615 DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (NULL, 'c'); +ERROR HY000: Failed to read auto-increment value from storage engine +SELECT * FROM t1; +a b +18446744073709551613 a +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result b/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result deleted file mode 100644 index 28b5b6cd070..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/r/autoincrement.result +++ /dev/null @@ -1 +0,0 @@ -# The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE. diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result new file mode 100644 index 00000000000..4f6702b85a7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result @@ -0,0 +1,62 @@ +# +# Issue #809: Wrong query result with bloom filters +# +create table t1 ( +id1 bigint not null, +id2 bigint not null, +id3 varchar(100) not null, +id4 int not null, +id5 int not null, +value bigint, +value2 varchar(100), +primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t3(seq int); +insert into t3 +select +1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; +insert t1 +select +(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; +set global rocksdb_force_flush_memtable_now=1; +# Full table scan +explain +select * from t1 limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 +select * from t1 limit 10; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +1000 2000 2000 9999 9999 1000 aaabbbccc +1000 2000 2000 9998 9998 1000 aaabbbccc +1000 2000 2000 9997 9997 1000 aaabbbccc +1000 2000 2000 9996 9996 1000 aaabbbccc +1000 1999 1999 9995 9995 1000 aaabbbccc +1000 1999 1999 9994 9994 1000 aaabbbccc +1000 1999 1999 9993 9993 1000 aaabbbccc +1000 1999 1999 9992 9992 1000 aaabbbccc +1000 1999 1999 9991 9991 1000 aaabbbccc +# An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 122 NULL 1 +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +create table t4 ( +pk int unsigned not null primary key, +kp1 int unsigned not null, +kp2 int unsigned not null, +col1 int unsigned, +key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); +# This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; +pk kp1 kp2 col1 +drop table t1,t2,t3,t4; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result index b931a61e233..4a746d64c87 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in ascending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result new file mode 100644 index 00000000000..4e79d82810e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result @@ -0,0 +1,11 @@ +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +SET rocksdb_bulk_load_allow_unsorted=1; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1); +connect con1,localhost,root,,; +DROP TABLE t1; +connection default; +disconnect con1; +SET rocksdb_bulk_load=0; +SELECT * FROM t1; +ERROR 42S02: Table 'test.t1' doesn't exist diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result index f230b173892..3703c208d0b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result @@ -1,4 +1,4 @@ -CREATE TABLE t1(pk INT, PRIMARY KEY(pk)); +CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; INSERT INTO t1 VALUES(10); INSERT INTO t1 VALUES(11); @@ -14,18 +14,30 @@ INSERT INTO t1 VALUES(1); INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); -# -# In MyRocks, the following statement will intentionally crash the server. -# In MariaDB, it will cause an error SET rocksdb_bulk_load=0; ERROR HY000: Rows inserted during bulk load must not overlap existing rows -# -# Despite the error, bulk load operation is over so the variable value -# will be 0: -select @@rocksdb_bulk_load; -@@rocksdb_bulk_load -0 +SHOW VARIABLES LIKE 'rocksdb_bulk_load'; +Variable_name Value +rocksdb_bulk_load OFF call mtr.add_suppression('finalizing last SST file while setting bulk loading variable'); +SELECT * FROM t1; +pk +10 +11 +FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable/ in rocksdb.bulk_load_errors.1.err +connect con1,localhost,root,,; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(1); +INSERT INTO t1 VALUES(2); +INSERT INTO t1 VALUES(20); +INSERT INTO t1 VALUES(21); +connection default; +disconnect con1; +SELECT * FROM t1; +pk +10 +11 +FOUND 1 /RocksDB: Error [0-9]+ finalizing last SST file while disconnecting/ in rocksdb.bulk_load_errors.2.err TRUNCATE TABLE t1; SET rocksdb_bulk_load_allow_unsorted=1; SET rocksdb_bulk_load=1; @@ -53,3 +65,35 @@ pk 202 SET rocksdb_bulk_load_allow_unsorted=DEFAULT; DROP TABLE t1; +CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (),(),(); +ERROR HY000: Rows must be inserted in primary key order during bulk load operation +SET rocksdb_bulk_load=0; +DROP TABLE t1; +SET @orig_table_open_cache=@@global.table_open_cache; +CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(13, 0); +INSERT INTO t1 VALUES(2, 'test 2'); +Warnings: +Warning 1366 Incorrect integer value: 'test 2' for column 'b' at row 1 +INSERT INTO t1 VALUES(@id, @arg04); +SET @@global.table_open_cache=FALSE; +Warnings: +Warning 1292 Truncated incorrect table_open_cache value: '0' +INSERT INTO t1 VALUES(51479+0.333333333,1); +DROP TABLE t1; +SET @@global.table_open_cache=@orig_table_open_cache; +FOUND 1 /RocksDB: Error [0-9]+ finalizing bulk load while closing handler/ in rocksdb.bulk_load_errors.3.err +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1), (2); +INSERT INTO t2 VALUES (1), (2); +INSERT INTO t1 VALUES (1); +INSERT INTO t2 VALUES (3); +ERROR HY000: Rows inserted during bulk load must not overlap existing rows +SET rocksdb_bulk_load=0; +DROP TABLE t1; +DROP TABLE t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result index 947f67434a5..4fd7ae9d9a5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in ascending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result index 6c38e030afb..7d7c9f34200 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in descending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "rev:cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result index e566691af28..c1b6d48a6a5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result @@ -1,4 +1,3 @@ -DROP TABLE IF EXISTS t1, t2, t3; Data will be ordered in descending order CREATE TABLE t1( pk CHAR(5), @@ -6,21 +5,21 @@ a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t2( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin'; +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; CREATE TABLE t3( pk CHAR(5), a CHAR(30), b CHAR(30), PRIMARY KEY(pk) COMMENT "cf1", KEY(a) -) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result index 2a7c7bd69fd..2adaba1e228 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result @@ -1,12 +1,12 @@ -DROP TABLE IF EXISTS t1; SET rocksdb_bulk_load_size=3; SET rocksdb_bulk_load_allow_unsorted=1; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b SET rocksdb_bulk_load=0; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b -3 5 -1 3 @@ -14,42 +14,49 @@ a b 4 -2 6 -4 DROP TABLE t1; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b)); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b)) +ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b -6 -4 -4 -2 -2 0 --1 3 --3 5 SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +-3 5 +-1 3 +2 0 +4 -2 +6 -4 DROP TABLE t1; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); -CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; SET rocksdb_bulk_load=1; INSERT INTO t1 VALUES (1,1); INSERT INTO t2 VALUES (1,1); -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b 1 1 INSERT INTO t1 VALUES (2,2); -SELECT * FROM t2; +SELECT * FROM t2 FORCE INDEX (PRIMARY); a b 1 1 -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b 1 1 SET rocksdb_bulk_load=0; -SELECT * FROM t1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); a b 1 1 2 2 DROP TABLE t1, t2; -CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1"); -CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1"); +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1") +ENGINE=ROCKSDB; CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1") -PARTITION BY KEY() PARTITIONS 4; +ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; connect other,localhost,root,,; set session transaction isolation level repeatable read; select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; @@ -99,5 +106,15 @@ count(a) select count(b) from t3; count(b) 5000000 +SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3; +a b +-4999998 5000000 +-4999996 4999998 +-4999994 4999996 +SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3; +a b +4999999 -4999997 +4999997 -4999995 +4999995 -4999993 +disconnect other; DROP TABLE t1, t2, t3; -SET rocksdb_bulk_load_allow_unsorted=0; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result new file mode 100644 index 00000000000..f828fa57255 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result @@ -0,0 +1,120 @@ +SET rocksdb_bulk_load_size=3; +SET rocksdb_bulk_load_allow_unsorted=1; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +6 -4 +4 -2 +2 0 +-1 3 +-3 5 +DROP TABLE t1; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1", KEY(b)) +ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +6 -4 +4 -2 +2 0 +-1 3 +-3 5 +DROP TABLE t1; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1,1); +INSERT INTO t2 VALUES (1,1); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +1 1 +INSERT INTO t1 VALUES (2,2); +SELECT * FROM t2 FORCE INDEX (PRIMARY); +a b +1 1 +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +1 1 +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +a b +2 2 +1 1 +DROP TABLE t1, t2; +CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "rev:cf1") +ENGINE=ROCKSDB; +CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1") +ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; +connect other,localhost,root,,; +set session transaction isolation level repeatable read; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +STAT_TYPE VALUE +DB_NUM_SNAPSHOTS 0 +start transaction with consistent snapshot; +select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; +STAT_TYPE VALUE +DB_NUM_SNAPSHOTS 1 +connection default; +set rocksdb_bulk_load=1; +set rocksdb_bulk_load_size=100000; +LOAD DATA INFILE <input_file> INTO TABLE t1; +LOAD DATA INFILE <input_file> INTO TABLE t2; +LOAD DATA INFILE <input_file> INTO TABLE t3; +set rocksdb_bulk_load=0; +SHOW TABLE STATUS WHERE name LIKE 't%'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned +ANALYZE TABLE t1, t2, t3; +Table Op Msg_type Msg_text +test.t1 analyze status OK +test.t2 analyze status OK +test.t3 analyze status OK +SHOW TABLE STATUS WHERE name LIKE 't%'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL +t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned +select count(a) from t1; +count(a) +5000000 +select count(b) from t1; +count(b) +5000000 +select count(a) from t2; +count(a) +5000000 +select count(b) from t2; +count(b) +5000000 +select count(a) from t3; +count(a) +5000000 +select count(b) from t3; +count(b) +5000000 +SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3; +a b +4999999 -4999997 +4999997 -4999995 +4999995 -4999993 +SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3; +a b +-4999998 5000000 +-4999996 4999998 +-4999994 4999996 +disconnect other; +DROP TABLE t1, t2, t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result index 3bd87e9ffd6..4b201d523d9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/cardinality.result @@ -1,3 +1,38 @@ +CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb; +insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4), +(5, 4),(6, 4),(7, 4),(8, 4),(9, 4); +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="id"; +cardinality +NULL +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="a"; +cardinality +NULL +ANALYZE TABLE t0; +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +FLOOR(@N/cardinality) +1 +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; +FLOOR(@N/cardinality) +2 +SET GLOBAL rocksdb_force_flush_memtable_now = 1; +ANALYZE TABLE t0; +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +FLOOR(@N/cardinality) +1 +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; +FLOOR(@N/cardinality) +2 +drop table t0; DROP TABLE IF EXISTS t1,t10,t11; create table t1( id bigint not null primary key, diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result new file mode 100644 index 00000000000..6ff49908a51 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result @@ -0,0 +1,7 @@ +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; +variable_name variable_value +ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON +FOUND 1 /RocksDB: Compatibility check against existing database options failed/ in my_restart.err +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; +variable_name variable_value +ROCKSDB_IGNORE_UNKNOWN_OPTIONS ON diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result index d7cb89becb7..1e7509172cb 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result @@ -66,13 +66,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -122,13 +115,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -147,13 +133,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -204,13 +183,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -229,13 +201,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -254,13 +219,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -295,13 +253,6 @@ KEY LOCK TYPE: EXCLUSIVE INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- ----------------------------------------- @@ -324,8 +275,12 @@ i 3 select * from t where i=2 for update; select * from t where i=3 for update; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; select * from t where i=1 for update; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; +deadlocks +true rollback; i 3 @@ -410,13 +365,6 @@ KEY LOCK TYPE: SHARED INDEX NAME: PRIMARY TABLE NAME: test.t ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: PRIMARY -TABLE NAME: test.t --------TXN_ID GOT DEADLOCK--------- @@ -455,13 +403,6 @@ KEY LOCK TYPE: SHARED INDEX NAME: NOT FOUND; IDX_ID TABLE NAME: NOT FOUND; IDX_ID ----------------WAITING FOR--------------- -TXN_ID -COLUMN FAMILY NAME: default -KEY -LOCK TYPE: EXCLUSIVE -INDEX NAME: NOT FOUND; IDX_ID -TABLE NAME: NOT FOUND; IDX_ID --------TXN_ID GOT DEADLOCK--------- diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result index a39f2d8c0d6..6bca2cbad2d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result @@ -1,17 +1,22 @@ DROP TABLE IF EXISTS is_ddl_t1; DROP TABLE IF EXISTS is_ddl_t2; +DROP TABLE IF EXISTS is_ddl_t3; CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT, PRIMARY KEY (i), KEY (j), KEY (k, l) COMMENT 'kl_cf') ENGINE = ROCKSDB; CREATE TABLE is_ddl_t2 (x INT, y INT, z INT, PRIMARY KEY (z, y) COMMENT 'zy_cf', KEY (x)) ENGINE = ROCKSDB; -SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; -TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF -test is_ddl_t1 NULL PRIMARY 1 13 default -test is_ddl_t1 NULL j 2 13 default -test is_ddl_t1 NULL k 2 13 kl_cf -test is_ddl_t2 NULL PRIMARY 1 13 zy_cf -test is_ddl_t2 NULL x 2 13 default +CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB +COMMENT "ttl_duration=3600;"; +SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; +TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF TTL_DURATION INDEX_FLAGS +test is_ddl_t1 NULL PRIMARY 1 13 default 0 0 +test is_ddl_t1 NULL j 2 13 default 0 0 +test is_ddl_t1 NULL k 2 13 kl_cf 0 0 +test is_ddl_t2 NULL PRIMARY 1 13 zy_cf 0 0 +test is_ddl_t2 NULL x 2 13 default 0 0 +test is_ddl_t3 NULL PRIMARY 1 13 default 3600 1 DROP TABLE is_ddl_t1; DROP TABLE is_ddl_t2; +DROP TABLE is_ddl_t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result new file mode 100644 index 00000000000..36db92095e9 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result @@ -0,0 +1,215 @@ +set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout; +set @prior_deadlock_detect = @@rocksdb_deadlock_detect; +set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks; +set global rocksdb_deadlock_detect = on; +set global rocksdb_lock_wait_timeout = 10000; +# Clears deadlock buffer of any prior deadlocks. +set global rocksdb_max_latest_deadlocks = 0; +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connect con3,localhost,root,,; +connection default; +show create table information_schema.rocksdb_deadlock; +Table Create Table +ROCKSDB_DEADLOCK CREATE TEMPORARY TABLE `ROCKSDB_DEADLOCK` ( + `DEADLOCK_ID` bigint(8) NOT NULL DEFAULT 0, + `TRANSACTION_ID` bigint(8) NOT NULL DEFAULT 0, + `CF_NAME` varchar(193) NOT NULL DEFAULT '', + `WAITING_KEY` varchar(513) NOT NULL DEFAULT '', + `LOCK_TYPE` varchar(193) NOT NULL DEFAULT '', + `INDEX_NAME` varchar(193) NOT NULL DEFAULT '', + `TABLE_NAME` varchar(193) NOT NULL DEFAULT '', + `ROLLED_BACK` bigint(8) NOT NULL DEFAULT 0 +) ENGINE=MEMORY DEFAULT CHARSET=utf8 +create table t (i int primary key) engine=rocksdb; +insert into t values (1), (2), (3); +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +Deadlock #1 +connection con1; +begin; +select * from t where i=1 for update; +i +1 +connection con2; +begin; +select * from t where i=2 for update; +i +2 +connection con1; +select * from t where i=2 for update; +connection con2; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +connection con1; +i +2 +rollback; +connection default; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +Deadlock #2 +connection con1; +begin; +select * from t where i=1 for update; +i +1 +connection con2; +begin; +select * from t where i=2 for update; +i +2 +connection con1; +select * from t where i=2 for update; +connection con2; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +connection con1; +i +2 +rollback; +connection default; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +set global rocksdb_max_latest_deadlocks = 10; +Deadlock #3 +connection con1; +begin; +select * from t where i=1 for update; +i +1 +connection con2; +begin; +select * from t where i=2 for update; +i +2 +connection con1; +select * from t where i=2 for update; +connection con2; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +connection con1; +i +2 +rollback; +connection default; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +set global rocksdb_max_latest_deadlocks = 1; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 1 +connection con3; +set rocksdb_deadlock_detect_depth = 2; +Deadlock #4 +connection con1; +begin; +select * from t where i=1 for update; +i +1 +connection con2; +begin; +select * from t where i=2 for update; +i +2 +connection con3; +begin; +select * from t where i=3 for update; +i +3 +connection con1; +select * from t where i=2 for update; +connection con2; +select * from t where i=3 for update; +connection con3; +select * from t where i=1 for update; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +connection con2; +i +3 +rollback; +connection con1; +i +2 +rollback; +connection default; +set global rocksdb_max_latest_deadlocks = 5; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +Deadlock #5 +connection con1; +begin; +select * from t where i=1 for update; +i +1 +connection con2; +begin; +select * from t where i=2 for update; +i +2 +connection con3; +begin; +select * from t where i=3 lock in share mode; +i +3 +connection con1; +select * from t where i=100 for update; +i +select * from t where i=101 for update; +i +select * from t where i=2 for update; +connection con2; +select * from t where i=3 lock in share mode; +i +3 +select * from t where i=200 for update; +i +select * from t where i=201 for update; +i +select * from t where i=1 lock in share mode; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +rollback; +connection con1; +i +2 +rollback; +connection con3; +rollback; +connection default; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE PRIMARY test.t 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED PRIMARY test.t 1 +disconnect con1; +disconnect con2; +disconnect con3; +set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; +set global rocksdb_deadlock_detect = @prior_deadlock_detect; +drop table t; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY EXCLUSIVE INDEX_NAME TABLE_NAME 0 +DEADLOCK_ID TRANSACTION_ID default WAITING_KEY SHARED INDEX_NAME TABLE_NAME 1 +set global rocksdb_max_latest_deadlocks = 0; +# Clears deadlock buffer of any existent deadlocks. +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; +select * from information_schema.rocksdb_deadlock; +DEADLOCK_ID TRANSACTION_ID CF_NAME WAITING_KEY LOCK_TYPE INDEX_NAME TABLE_NAME ROLLED_BACK diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result index f63a271cdce..7fb9055083b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result @@ -25,10 +25,10 @@ UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100; DROP TABLE t0, t1; create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb; insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300); +set global rocksdb_force_flush_memtable_now=1; analyze table t1; Table Op Msg_type Msg_text test.t1 analyze status OK -set global rocksdb_force_flush_memtable_now=1; explain select * from t1 where key1 = 1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ref key1 key1 5 const # diff --git a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result index 6850d8dff16..aba14e3c076 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result @@ -4,15 +4,14 @@ DROP TABLE IF EXISTS t3; SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=1; create table t1 (a int) engine=rocksdb; drop table t1; -select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; TYPE NAME VALUE MAX_INDEX_ID MAX_INDEX_ID max_index_id CF_FLAGS 0 default [0] CF_FLAGS 1 __system__ [0] -DDL_DROP_INDEX_ONGOING cf_id:0,index_id:max_index_id -select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; count(*) -4 +3 SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0; select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; CREATE TABLE t1 (i1 INT, i2 INT, PRIMARY KEY (i1)) ENGINE = ROCKSDB; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result index 62875e378a4..797f339d8b1 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result @@ -6,6 +6,19 @@ t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL INSERT INTO t1 VALUES ('538647864786478647864'); Warnings: Warning 1264 Out of range value for column 'pk' at row 1 +SELECT * FROM t1; +pk +5 +9223372036854775807 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (); +ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY' +SELECT * FROM t1; +pk +5 +9223372036854775807 SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL @@ -19,3 +32,37 @@ SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL DROP TABLE t1; +CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT); +INSERT INTO t1 VALUES (5); +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (1000); +Warnings: +Warning 1264 Out of range value for column 'pk' at row 1 +SELECT * FROM t1; +pk +5 +127 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB 10 Fixed 2 15 30 0 0 0 127 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (); +ERROR 23000: Duplicate entry '127' for key 'PRIMARY' +SELECT * FROM t1; +pk +5 +127 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL +INSERT INTO t1 VALUES (); +ERROR 23000: Duplicate entry '127' for key 'PRIMARY' +SELECT * FROM t1; +pk +5 +127 +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 ROCKSDB # Fixed 2 # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result index d0bfb05fd1b..96efca6e2b7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result @@ -8,6 +8,7 @@ ROW_LOCK_WAIT_TIMEOUTS begin; set @@rocksdb_lock_wait_timeout=1; begin; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; insert into t values(0); ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; @@ -16,6 +17,10 @@ ROW_LOCK_WAIT_TIMEOUTS select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; ROW_LOCK_WAIT_TIMEOUTS 1 +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; +waits +true +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; insert into t values(0); ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; @@ -24,4 +29,7 @@ ROW_LOCK_WAIT_TIMEOUTS select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; ROW_LOCK_WAIT_TIMEOUTS 2 +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; +waits +true drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result index 9674b2b0c15..98c5ebe9f4c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result @@ -81,6 +81,7 @@ ROCKSDB_DDL Gamma ROCKSDB_INDEX_FILE_MAP Gamma ROCKSDB_LOCKS Gamma ROCKSDB_TRX Gamma +ROCKSDB_DEADLOCK Gamma # # MDEV-12466 : Assertion `thd->transaction.stmt.is_empty() || thd->in_sub_stmt || ... # diff --git a/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result new file mode 100644 index 00000000000..5d34f4e9640 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result @@ -0,0 +1,21 @@ +CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*"); +FOUND 1 /RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit/ in rocksdb.max_open_files.err +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; +FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files +1 +SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files; +@@global.open_files_limit - 1 = @@global.rocksdb_max_open_files +1 +SELECT @@global.rocksdb_max_open_files; +@@global.rocksdb_max_open_files +0 +CREATE TABLE t1(a INT) ENGINE=ROCKSDB; +INSERT INTO t1 VALUES(0),(1),(2),(3),(4); +SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1; +DROP TABLE t1; +SELECT @@global.rocksdb_max_open_files; +@@global.rocksdb_max_open_files +-1 +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; +FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files +1 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result index 27b1779627b..1fe61fe9fc5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result @@ -36,7 +36,7 @@ explain select b, d from t where d > 4; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan rows_read -1509 +1505 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -44,7 +44,7 @@ explain select a, b, c, d from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra @@ -58,13 +58,13 @@ explain select e from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select e from t where a = 5 and d <= 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where rows_read -251 +250 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -72,13 +72,13 @@ explain select a, b, c, d from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -51 +26 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -86,13 +86,13 @@ explain select e from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select e from t where a = 5 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY,b PRIMARY 4 const # Using where rows_read -251 +250 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -100,13 +100,13 @@ explain select a, b, c, d from t where a in (1, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index rows_read -502 +500 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -102 +52 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -114,13 +114,13 @@ explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY PRIMARY 4 NULL # Using where; Using index rows_read -753 +750 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 3, 5) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -153 +78 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -128,13 +128,13 @@ explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index rows_read -204 +200 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 5) and b in (1, 2) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -44 +24 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -142,13 +142,13 @@ explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) a id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 8 NULL # Using where; Using index rows_read -765 +750 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a in (1, 2, 3, 4, 5) and b in (1, 2, 3) and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -165 +90 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -156,13 +156,13 @@ explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY,b PRIMARY 8 const,const # Using where; Using index rows_read -51 +50 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a, b, c, d from t where a = 5 and b = 2 and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b PRIMARY 16 NULL # Using where; Using index for skip scan rows_read -11 +6 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=off'; @@ -170,7 +170,7 @@ explain select a+1, b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select a+1, b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra @@ -184,7 +184,7 @@ explain select b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t ref PRIMARY PRIMARY 4 const # Using where; Using index rows_read -251 +250 set optimizer_switch = 'skip_scan=on,skip_scan_cost_based=off'; explain select b, c, d from t where a = 5 and d < 3; id select_type table type possible_keys key key_len ref rows Extra @@ -204,7 +204,7 @@ explain select a, b, c, d from t where a = b and d >= 98; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t range PRIMARY,b b 8 NULL # Using where; Using index for skip scan rows_read -9 +5 include/diff_tables.inc [temp_orig, temp_skip] set optimizer_switch = 'skip_scan=off,skip_scan_cost_based=on'; set optimizer_switch = 'skip_scan=on'; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result index 6586b92d129..28f965843aa 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/perf_context.result @@ -14,8 +14,13 @@ test t1 NULL BLOCK_READ_BYTE # test t1 NULL BLOCK_READ_TIME # test t1 NULL BLOCK_CHECKSUM_TIME # test t1 NULL BLOCK_DECOMPRESS_TIME # +test t1 NULL GET_READ_BYTES # +test t1 NULL MULTIGET_READ_BYTES # +test t1 NULL ITER_READ_BYTES # test t1 NULL INTERNAL_KEY_SKIPPED_COUNT # test t1 NULL INTERNAL_DELETE_SKIPPED_COUNT # +test t1 NULL INTERNAL_RECENT_SKIPPED_COUNT # +test t1 NULL INTERNAL_MERGE_COUNT # test t1 NULL GET_SNAPSHOT_TIME # test t1 NULL GET_FROM_MEMTABLE_TIME # test t1 NULL GET_FROM_MEMTABLE_COUNT # @@ -23,9 +28,12 @@ test t1 NULL GET_POST_PROCESS_TIME # test t1 NULL GET_FROM_OUTPUT_FILES_TIME # test t1 NULL SEEK_ON_MEMTABLE_TIME # test t1 NULL SEEK_ON_MEMTABLE_COUNT # +test t1 NULL NEXT_ON_MEMTABLE_COUNT # +test t1 NULL PREV_ON_MEMTABLE_COUNT # test t1 NULL SEEK_CHILD_SEEK_TIME # test t1 NULL SEEK_CHILD_SEEK_COUNT # -test t1 NULL SEEK_IN_HEAP_TIME # +test t1 NULL SEEK_MIN_HEAP_TIME # +test t1 NULL SEEK_MAX_HEAP_TIME # test t1 NULL SEEK_INTERNAL_SEEK_TIME # test t1 NULL FIND_NEXT_USER_ENTRY_TIME # test t1 NULL WRITE_WAL_TIME # @@ -41,6 +49,12 @@ test t1 NULL NEW_TABLE_BLOCK_ITER_NANOS # test t1 NULL NEW_TABLE_ITERATOR_NANOS # test t1 NULL BLOCK_SEEK_NANOS # test t1 NULL FIND_TABLE_NANOS # +test t1 NULL BLOOM_MEMTABLE_HIT_COUNT # +test t1 NULL BLOOM_MEMTABLE_MISS_COUNT # +test t1 NULL BLOOM_SST_HIT_COUNT # +test t1 NULL BLOOM_SST_MISS_COUNT # +test t1 NULL KEY_LOCK_WAIT_TIME # +test t1 NULL KEY_LOCK_WAIT_COUNT # test t1 NULL IO_THREAD_POOL_ID # test t1 NULL IO_BYTES_WRITTEN # test t1 NULL IO_BYTES_READ # @@ -59,8 +73,13 @@ BLOCK_READ_BYTE # BLOCK_READ_TIME # BLOCK_CHECKSUM_TIME # BLOCK_DECOMPRESS_TIME # +GET_READ_BYTES # +MULTIGET_READ_BYTES # +ITER_READ_BYTES # INTERNAL_KEY_SKIPPED_COUNT # INTERNAL_DELETE_SKIPPED_COUNT # +INTERNAL_RECENT_SKIPPED_COUNT # +INTERNAL_MERGE_COUNT # GET_SNAPSHOT_TIME # GET_FROM_MEMTABLE_TIME # GET_FROM_MEMTABLE_COUNT # @@ -68,9 +87,12 @@ GET_POST_PROCESS_TIME # GET_FROM_OUTPUT_FILES_TIME # SEEK_ON_MEMTABLE_TIME # SEEK_ON_MEMTABLE_COUNT # +NEXT_ON_MEMTABLE_COUNT # +PREV_ON_MEMTABLE_COUNT # SEEK_CHILD_SEEK_TIME # SEEK_CHILD_SEEK_COUNT # -SEEK_IN_HEAP_TIME # +SEEK_MIN_HEAP_TIME # +SEEK_MAX_HEAP_TIME # SEEK_INTERNAL_SEEK_TIME # FIND_NEXT_USER_ENTRY_TIME # WRITE_WAL_TIME # @@ -86,6 +108,12 @@ NEW_TABLE_BLOCK_ITER_NANOS # NEW_TABLE_ITERATOR_NANOS # BLOCK_SEEK_NANOS # FIND_TABLE_NANOS # +BLOOM_MEMTABLE_HIT_COUNT # +BLOOM_MEMTABLE_MISS_COUNT # +BLOOM_SST_HIT_COUNT # +BLOOM_SST_MISS_COUNT # +KEY_LOCK_WAIT_TIME # +KEY_LOCK_WAIT_COUNT # IO_THREAD_POOL_ID # IO_BYTES_WRITTEN # IO_BYTES_READ # diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result index 0b5e512cdc9..6138dac92e5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result @@ -573,9 +573,6 @@ insert into t30 values ('row3', 'row3-key', 'row3-data'), ('row4', 'row4-key', 'row4-data'), ('row5', 'row5-key', 'row5-data'); -analyze table t30; -Table Op Msg_type Msg_text -test.t30 analyze status OK explain select * from t30 where key1 <='row3-key'; id select_type table type possible_keys key key_len ref rows Extra @@ -868,6 +865,7 @@ ERROR 42S02: Unknown table 'test.t45' show variables where variable_name like 'rocksdb%' and +variable_name not like 'rocksdb_max_open_files' and variable_name not like 'rocksdb_supported_compression_types'; Variable_name Value rocksdb_access_hint_on_compaction_start 1 @@ -875,6 +873,7 @@ rocksdb_advise_random_on_open ON rocksdb_allow_concurrent_memtable_write OFF rocksdb_allow_mmap_reads OFF rocksdb_allow_mmap_writes OFF +rocksdb_allow_to_start_after_corruption OFF rocksdb_blind_delete_primary_key OFF rocksdb_block_cache_size 536870912 rocksdb_block_restart_interval 16 @@ -894,7 +893,6 @@ rocksdb_compaction_sequential_deletes 0 rocksdb_compaction_sequential_deletes_count_sd OFF rocksdb_compaction_sequential_deletes_file_size 0 rocksdb_compaction_sequential_deletes_window 0 -rocksdb_concurrent_prepare ON rocksdb_create_checkpoint rocksdb_create_if_missing ON rocksdb_create_missing_column_families OFF @@ -918,7 +916,6 @@ rocksdb_enable_ttl_read_filtering ON rocksdb_enable_write_thread_adaptive_yield OFF rocksdb_error_if_exists OFF rocksdb_flush_log_at_trx_commit 0 -rocksdb_flush_memtable_on_analyze ON rocksdb_force_compute_memtable_stats ON rocksdb_force_compute_memtable_stats_cachetime 0 rocksdb_force_flush_memtable_and_lzero_now OFF @@ -926,6 +923,7 @@ rocksdb_force_flush_memtable_now OFF rocksdb_force_index_records_in_range 0 rocksdb_git_hash # rocksdb_hash_index_allow_collision ON +rocksdb_ignore_unknown_options ON rocksdb_index_type kBinarySearch rocksdb_info_log_level error_level rocksdb_io_write_timeout 0 @@ -942,8 +940,7 @@ rocksdb_max_background_jobs 2 rocksdb_max_latest_deadlocks 5 rocksdb_max_log_file_size 0 rocksdb_max_manifest_file_size 18446744073709551615 -rocksdb_max_open_files -1 -rocksdb_max_row_locks 1073741824 +rocksdb_max_row_locks 1048576 rocksdb_max_subcompactions 1 rocksdb_max_total_wal_size 0 rocksdb_merge_buf_size 67108864 @@ -978,6 +975,7 @@ rocksdb_table_cache_numshardbits 6 rocksdb_table_stats_sampling_pct 10 rocksdb_tmpdir rocksdb_trace_sst_api OFF +rocksdb_two_write_queues ON rocksdb_unsafe_for_binlog OFF rocksdb_update_cf_options rocksdb_use_adaptive_mutex OFF @@ -1464,6 +1462,7 @@ Rocksdb_rows_read # Rocksdb_rows_updated # Rocksdb_rows_deleted_blind # Rocksdb_rows_expired # +Rocksdb_rows_filtered # Rocksdb_system_rows_deleted # Rocksdb_system_rows_inserted # Rocksdb_system_rows_read # @@ -1474,11 +1473,22 @@ Rocksdb_queries_point # Rocksdb_queries_range # Rocksdb_covered_secondary_key_lookups # Rocksdb_block_cache_add # +Rocksdb_block_cache_add_failures # +Rocksdb_block_cache_bytes_read # +Rocksdb_block_cache_bytes_write # +Rocksdb_block_cache_data_add # +Rocksdb_block_cache_data_bytes_insert # Rocksdb_block_cache_data_hit # Rocksdb_block_cache_data_miss # +Rocksdb_block_cache_filter_add # +Rocksdb_block_cache_filter_bytes_evict # +Rocksdb_block_cache_filter_bytes_insert # Rocksdb_block_cache_filter_hit # Rocksdb_block_cache_filter_miss # Rocksdb_block_cache_hit # +Rocksdb_block_cache_index_add # +Rocksdb_block_cache_index_bytes_evict # +Rocksdb_block_cache_index_bytes_insert # Rocksdb_block_cache_index_hit # Rocksdb_block_cache_index_miss # Rocksdb_block_cache_miss # @@ -1495,7 +1505,11 @@ Rocksdb_compaction_key_drop_new # Rocksdb_compaction_key_drop_obsolete # Rocksdb_compaction_key_drop_user # Rocksdb_flush_write_bytes # +Rocksdb_get_hit_l0 # +Rocksdb_get_hit_l1 # +Rocksdb_get_hit_l2_and_up # Rocksdb_getupdatessince_calls # +Rocksdb_iter_bytes_read # Rocksdb_memtable_hit # Rocksdb_memtable_miss # Rocksdb_no_file_closes # @@ -1503,6 +1517,12 @@ Rocksdb_no_file_errors # Rocksdb_no_file_opens # Rocksdb_num_iterators # Rocksdb_number_block_not_compressed # +Rocksdb_number_db_next # +Rocksdb_number_db_next_found # +Rocksdb_number_db_prev # +Rocksdb_number_db_prev_found # +Rocksdb_number_db_seek # +Rocksdb_number_db_seek_found # Rocksdb_number_deletes_filtered # Rocksdb_number_keys_read # Rocksdb_number_keys_updated # @@ -1517,11 +1537,11 @@ Rocksdb_number_sst_entry_merge # Rocksdb_number_sst_entry_other # Rocksdb_number_sst_entry_put # Rocksdb_number_sst_entry_singledelete # -Rocksdb_number_stat_computes # Rocksdb_number_superversion_acquires # Rocksdb_number_superversion_cleanups # Rocksdb_number_superversion_releases # -Rocksdb_rate_limit_delay_millis # +Rocksdb_row_lock_deadlocks # +Rocksdb_row_lock_wait_timeouts # Rocksdb_snapshot_conflict_errors # Rocksdb_stall_l0_file_count_limit_slowdowns # Rocksdb_stall_locked_l0_file_count_limit_slowdowns # @@ -1549,6 +1569,7 @@ ROCKSDB_ROWS_READ ROCKSDB_ROWS_UPDATED ROCKSDB_ROWS_DELETED_BLIND ROCKSDB_ROWS_EXPIRED +ROCKSDB_ROWS_FILTERED ROCKSDB_SYSTEM_ROWS_DELETED ROCKSDB_SYSTEM_ROWS_INSERTED ROCKSDB_SYSTEM_ROWS_READ @@ -1559,11 +1580,22 @@ ROCKSDB_QUERIES_POINT ROCKSDB_QUERIES_RANGE ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS ROCKSDB_BLOCK_CACHE_ADD +ROCKSDB_BLOCK_CACHE_ADD_FAILURES +ROCKSDB_BLOCK_CACHE_BYTES_READ +ROCKSDB_BLOCK_CACHE_BYTES_WRITE +ROCKSDB_BLOCK_CACHE_DATA_ADD +ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT ROCKSDB_BLOCK_CACHE_DATA_HIT ROCKSDB_BLOCK_CACHE_DATA_MISS +ROCKSDB_BLOCK_CACHE_FILTER_ADD +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT ROCKSDB_BLOCK_CACHE_FILTER_HIT ROCKSDB_BLOCK_CACHE_FILTER_MISS ROCKSDB_BLOCK_CACHE_HIT +ROCKSDB_BLOCK_CACHE_INDEX_ADD +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT ROCKSDB_BLOCK_CACHE_INDEX_HIT ROCKSDB_BLOCK_CACHE_INDEX_MISS ROCKSDB_BLOCK_CACHE_MISS @@ -1580,7 +1612,11 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE ROCKSDB_COMPACTION_KEY_DROP_USER ROCKSDB_FLUSH_WRITE_BYTES +ROCKSDB_GET_HIT_L0 +ROCKSDB_GET_HIT_L1 +ROCKSDB_GET_HIT_L2_AND_UP ROCKSDB_GETUPDATESSINCE_CALLS +ROCKSDB_ITER_BYTES_READ ROCKSDB_MEMTABLE_HIT ROCKSDB_MEMTABLE_MISS ROCKSDB_NO_FILE_CLOSES @@ -1588,6 +1624,12 @@ ROCKSDB_NO_FILE_ERRORS ROCKSDB_NO_FILE_OPENS ROCKSDB_NUM_ITERATORS ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED +ROCKSDB_NUMBER_DB_NEXT +ROCKSDB_NUMBER_DB_NEXT_FOUND +ROCKSDB_NUMBER_DB_PREV +ROCKSDB_NUMBER_DB_PREV_FOUND +ROCKSDB_NUMBER_DB_SEEK +ROCKSDB_NUMBER_DB_SEEK_FOUND ROCKSDB_NUMBER_DELETES_FILTERED ROCKSDB_NUMBER_KEYS_READ ROCKSDB_NUMBER_KEYS_UPDATED @@ -1602,11 +1644,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE ROCKSDB_NUMBER_SST_ENTRY_OTHER ROCKSDB_NUMBER_SST_ENTRY_PUT ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE -ROCKSDB_NUMBER_STAT_COMPUTES ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS ROCKSDB_NUMBER_SUPERVERSION_RELEASES -ROCKSDB_RATE_LIMIT_DELAY_MILLIS +ROCKSDB_ROW_LOCK_DEADLOCKS +ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS ROCKSDB_SNAPSHOT_CONFLICT_ERRORS ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS @@ -1636,6 +1678,7 @@ ROCKSDB_ROWS_READ ROCKSDB_ROWS_UPDATED ROCKSDB_ROWS_DELETED_BLIND ROCKSDB_ROWS_EXPIRED +ROCKSDB_ROWS_FILTERED ROCKSDB_SYSTEM_ROWS_DELETED ROCKSDB_SYSTEM_ROWS_INSERTED ROCKSDB_SYSTEM_ROWS_READ @@ -1646,11 +1689,22 @@ ROCKSDB_QUERIES_POINT ROCKSDB_QUERIES_RANGE ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS ROCKSDB_BLOCK_CACHE_ADD +ROCKSDB_BLOCK_CACHE_ADD_FAILURES +ROCKSDB_BLOCK_CACHE_BYTES_READ +ROCKSDB_BLOCK_CACHE_BYTES_WRITE +ROCKSDB_BLOCK_CACHE_DATA_ADD +ROCKSDB_BLOCK_CACHE_DATA_BYTES_INSERT ROCKSDB_BLOCK_CACHE_DATA_HIT ROCKSDB_BLOCK_CACHE_DATA_MISS +ROCKSDB_BLOCK_CACHE_FILTER_ADD +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_FILTER_BYTES_INSERT ROCKSDB_BLOCK_CACHE_FILTER_HIT ROCKSDB_BLOCK_CACHE_FILTER_MISS ROCKSDB_BLOCK_CACHE_HIT +ROCKSDB_BLOCK_CACHE_INDEX_ADD +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_EVICT +ROCKSDB_BLOCK_CACHE_INDEX_BYTES_INSERT ROCKSDB_BLOCK_CACHE_INDEX_HIT ROCKSDB_BLOCK_CACHE_INDEX_MISS ROCKSDB_BLOCK_CACHE_MISS @@ -1667,7 +1721,11 @@ ROCKSDB_COMPACTION_KEY_DROP_NEW ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE ROCKSDB_COMPACTION_KEY_DROP_USER ROCKSDB_FLUSH_WRITE_BYTES +ROCKSDB_GET_HIT_L0 +ROCKSDB_GET_HIT_L1 +ROCKSDB_GET_HIT_L2_AND_UP ROCKSDB_GETUPDATESSINCE_CALLS +ROCKSDB_ITER_BYTES_READ ROCKSDB_MEMTABLE_HIT ROCKSDB_MEMTABLE_MISS ROCKSDB_NO_FILE_CLOSES @@ -1675,6 +1733,12 @@ ROCKSDB_NO_FILE_ERRORS ROCKSDB_NO_FILE_OPENS ROCKSDB_NUM_ITERATORS ROCKSDB_NUMBER_BLOCK_NOT_COMPRESSED +ROCKSDB_NUMBER_DB_NEXT +ROCKSDB_NUMBER_DB_NEXT_FOUND +ROCKSDB_NUMBER_DB_PREV +ROCKSDB_NUMBER_DB_PREV_FOUND +ROCKSDB_NUMBER_DB_SEEK +ROCKSDB_NUMBER_DB_SEEK_FOUND ROCKSDB_NUMBER_DELETES_FILTERED ROCKSDB_NUMBER_KEYS_READ ROCKSDB_NUMBER_KEYS_UPDATED @@ -1689,11 +1753,11 @@ ROCKSDB_NUMBER_SST_ENTRY_MERGE ROCKSDB_NUMBER_SST_ENTRY_OTHER ROCKSDB_NUMBER_SST_ENTRY_PUT ROCKSDB_NUMBER_SST_ENTRY_SINGLEDELETE -ROCKSDB_NUMBER_STAT_COMPUTES ROCKSDB_NUMBER_SUPERVERSION_ACQUIRES ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS ROCKSDB_NUMBER_SUPERVERSION_RELEASES -ROCKSDB_RATE_LIMIT_DELAY_MILLIS +ROCKSDB_ROW_LOCK_DEADLOCKS +ROCKSDB_ROW_LOCK_WAIT_TIMEOUTS ROCKSDB_SNAPSHOT_CONFLICT_ERRORS ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result new file mode 100644 index 00000000000..a245fa851de --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result @@ -0,0 +1,11 @@ +# +# Issue #728: Assertion `covers_key(b)' failed in int +# myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&, +# const rocksdb::Slice&) +# +CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT); +INSERT INTO t2(c1)VALUES(0); +SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC; +c1 c2 c3 +0 NULL NULL +DROP TABLE t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result index d7a4f9dd065..10a6a02008e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result @@ -7,5 +7,5 @@ count(*) 10000 explain select c1 from t1 where c1 > 5 limit 10; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range i i 9 NULL 9900 Using where; Using index +1 SIMPLE t1 range i i 9 NULL # Using where; Using index drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result index 1bcd3692b4a..9fc5db98d7d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result @@ -83,12 +83,12 @@ FROM INFORMATION_SCHEMA.ROCKSDB_PERF_CONTEXT WHERE TABLE_SCHEMA = 'test' GROUP BY TABLE_NAME, PARTITION_NAME; TABLE_SCHEMA TABLE_NAME PARTITION_NAME COUNT(STAT_TYPE) -test t1 NULL 43 -test t2 NULL 43 -test t4 p0 43 -test t4 p1 43 -test t4 p2 43 -test t4 p3 43 +test t1 NULL 57 +test t2 NULL 57 +test t4 p0 57 +test t4 p1 57 +test t4 p2 57 +test t4 p3 57 SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CF_OPTIONS; CF_NAME OPTION_TYPE VALUE __system__ COMPARATOR # @@ -153,9 +153,15 @@ __system__ TABLE_FACTORY::BLOCK_SIZE # __system__ TABLE_FACTORY::BLOCK_SIZE_DEVIATION # __system__ TABLE_FACTORY::BLOCK_RESTART_INTERVAL # __system__ TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +__system__ TABLE_FACTORY::METADATA_BLOCK_SIZE # +__system__ TABLE_FACTORY::PARTITION_FILTERS # +__system__ TABLE_FACTORY::USE_DELTA_ENCODING # __system__ TABLE_FACTORY::FILTER_POLICY # __system__ TABLE_FACTORY::WHOLE_KEY_FILTERING # +__system__ TABLE_FACTORY::VERIFY_COMPRESSION # +__system__ TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # __system__ TABLE_FACTORY::FORMAT_VERSION # +__system__ TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # cf_t1 COMPARATOR # cf_t1 MERGE_OPERATOR # cf_t1 COMPACTION_FILTER # @@ -218,9 +224,15 @@ cf_t1 TABLE_FACTORY::BLOCK_SIZE # cf_t1 TABLE_FACTORY::BLOCK_SIZE_DEVIATION # cf_t1 TABLE_FACTORY::BLOCK_RESTART_INTERVAL # cf_t1 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +cf_t1 TABLE_FACTORY::METADATA_BLOCK_SIZE # +cf_t1 TABLE_FACTORY::PARTITION_FILTERS # +cf_t1 TABLE_FACTORY::USE_DELTA_ENCODING # cf_t1 TABLE_FACTORY::FILTER_POLICY # cf_t1 TABLE_FACTORY::WHOLE_KEY_FILTERING # +cf_t1 TABLE_FACTORY::VERIFY_COMPRESSION # +cf_t1 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # cf_t1 TABLE_FACTORY::FORMAT_VERSION # +cf_t1 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # default COMPARATOR # default MERGE_OPERATOR # default COMPACTION_FILTER # @@ -283,9 +295,15 @@ default TABLE_FACTORY::BLOCK_SIZE # default TABLE_FACTORY::BLOCK_SIZE_DEVIATION # default TABLE_FACTORY::BLOCK_RESTART_INTERVAL # default TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +default TABLE_FACTORY::METADATA_BLOCK_SIZE # +default TABLE_FACTORY::PARTITION_FILTERS # +default TABLE_FACTORY::USE_DELTA_ENCODING # default TABLE_FACTORY::FILTER_POLICY # default TABLE_FACTORY::WHOLE_KEY_FILTERING # +default TABLE_FACTORY::VERIFY_COMPRESSION # +default TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # default TABLE_FACTORY::FORMAT_VERSION # +default TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # rev:cf_t2 COMPARATOR # rev:cf_t2 MERGE_OPERATOR # rev:cf_t2 COMPACTION_FILTER # @@ -348,9 +366,15 @@ rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE # rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE_DEVIATION # rev:cf_t2 TABLE_FACTORY::BLOCK_RESTART_INTERVAL # rev:cf_t2 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL # +rev:cf_t2 TABLE_FACTORY::METADATA_BLOCK_SIZE # +rev:cf_t2 TABLE_FACTORY::PARTITION_FILTERS # +rev:cf_t2 TABLE_FACTORY::USE_DELTA_ENCODING # rev:cf_t2 TABLE_FACTORY::FILTER_POLICY # rev:cf_t2 TABLE_FACTORY::WHOLE_KEY_FILTERING # +rev:cf_t2 TABLE_FACTORY::VERIFY_COMPRESSION # +rev:cf_t2 TABLE_FACTORY::READ_AMP_BYTES_PER_BIT # rev:cf_t2 TABLE_FACTORY::FORMAT_VERSION # +rev:cf_t2 TABLE_FACTORY::ENABLE_INDEX_COMPRESSION # DROP TABLE t1; DROP TABLE t2; DROP TABLE t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result index 7642dcda43f..92906f22b1e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result @@ -1,4 +1,20 @@ -CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB; +create table t1 (pk int primary key) engine=rocksdb; +show tables; +Tables_in_test +#mysql50#t1#sql-test +t1 +call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.'); set session debug_dbug="+d,gen_sql_table_name"; rename table t1 to t2; set session debug_dbug= "-d,gen_sql_table_name"; +show tables; +Tables_in_test +#mysql50#t1#sql-test +t2 +show tables; +Tables_in_test +create table t2 (pk int primary key) engine=rocksdb; +show tables; +Tables_in_test +t2 +drop table t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result index 1df6e838bcd..c66b17926b7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result @@ -51,18 +51,30 @@ INSERT INTO t1 values (3); INSERT INTO t1 values (5); INSERT INTO t1 values (7); set global rocksdb_debug_ttl_rec_ts = 0; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +4 set global rocksdb_enable_ttl_read_filtering=0; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a 1 3 5 7 +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +0 set global rocksdb_enable_ttl_read_filtering=1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +4 DROP TABLE t1; CREATE TABLE t1 ( a int, @@ -191,20 +203,36 @@ a connection con2; set global rocksdb_force_flush_memtable_now=1; set global rocksdb_compact_cf='default'; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +1 # Switching to connection 1 connection con1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a 1 +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +0 UPDATE t1 set a = a + 1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a 2 +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +0 COMMIT; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; SELECT * FROM t1; a +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; +variable_value-@a +1 DROP TABLE t1; disconnect con1; disconnect con2; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result index c9fa716dffc..a7e086fde66 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result @@ -1,4 +1,3 @@ -drop table if exists t1,t2; # # A basic test whether endspace-aware variable length encoding # works when in PK @@ -756,3 +755,16 @@ email_i 1 drop table t; set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct; set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums; +drop table if exists t; +Warnings: +Note 1051 Unknown table 'test.t' +create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb; +insert into t(i,h) values('a','b'); +check table t; +Table Op Msg_type Msg_text +test.t check status OK +alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null; +check table t; +Table Op Msg_type Msg_text +test.t check status OK +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result index 3291826b290..e8456457cdd 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result @@ -4,23 +4,15 @@ call mtr.add_suppression("Aborting"); select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; plugin_name plugin_type ROCKSDB STORAGE ENGINE -# Check that ROCKSDB plugin is not loaded: -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -# Check that MyRocks has printed an error message into server error log: -FOUND 1 /enable both use_direct_reads/ in mysqld.1.err -# Now, restart the server back with regular settings -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -ROCKSDB STORAGE ENGINE -# -# Now, repeat the same with another set of invalid arguments -# -# Check that ROCKSDB plugin is not loaded: -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in mysqld.1.err -# Now, restart the server back with regular settings -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -plugin_name plugin_type -ROCKSDB STORAGE ENGINE +Checking direct reads +FOUND 1 /enable both use_direct_reads/ in use_direct_reads_writes.err +Checking direct writes +FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in use_direct_reads_writes.err +Checking rocksdb_flush_log_at_trx_commit +FOUND 1 /rocksdb_flush_log_at_trx_commit needs to be/ in use_direct_reads_writes.err +Validate flush_log settings when direct writes is enabled +set global rocksdb_flush_log_at_trx_commit=0; +set global rocksdb_flush_log_at_trx_commit=1; +ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '1' +set global rocksdb_flush_log_at_trx_commit=2; +ERROR 42000: Variable 'rocksdb_flush_log_at_trx_commit' can't be set to the value of '2' diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result index ee23446eec0..d0a9b034927 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result @@ -3,6 +3,7 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true; create table aaa (id int primary key, i int) engine rocksdb; set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit; SET GLOBAL rocksdb_flush_log_at_trx_commit=1; +insert aaa(id, i) values(0,1); select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; insert aaa(id, i) values(1,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; @@ -16,11 +17,11 @@ insert aaa(id, i) values(3,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; variable_value-@a 3 -SET GLOBAL rocksdb_flush_log_at_trx_commit=0; select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; +SET GLOBAL rocksdb_flush_log_at_trx_commit=0; insert aaa(id, i) values(4,1); -SET GLOBAL rocksdb_flush_log_at_trx_commit=2; select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; +SET GLOBAL rocksdb_flush_log_at_trx_commit=2; insert aaa(id, i) values(5,1); truncate table aaa; drop table aaa; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test index 78ddbe60da5..3977b38d725 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test @@ -65,7 +65,12 @@ ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; # disable duplicate index warning --disable_warnings # now do same index using copy algorithm +# hitting max row locks (1M) +--error ER_RDB_STATUS_GENERAL ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +set session rocksdb_bulk_load=1; +ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY; +set session rocksdb_bulk_load=0; --enable_warnings # checksum testing diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test index 1f3ef49e534..18ccf2e39f6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test @@ -135,3 +135,15 @@ set global rocksdb_force_flush_memtable_now = true; select * from t1; DROP TABLE t1; + +## https://github.com/facebook/mysql-5.6/issues/736 +create table t1 (i int auto_increment, key(i)) engine=rocksdb; +insert into t1 values(); +insert into t1 values(); +insert into t1 values(); + +show create table t1; +--source include/restart_mysqld.inc +show create table t1; + +drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test new file mode 100644 index 00000000000..67b2d5f96d7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test @@ -0,0 +1,75 @@ +--source include/have_rocksdb.inc +--source include/not_valgrind.inc + +--echo # +--echo # Test how MyRocks behaves when RocksDB reports corrupted data. +--echo # + +--source include/have_debug.inc + +# use custom error log to assert on error message in search_pattern_in_file.inc +--let LOG=$MYSQLTEST_VARDIR/tmp/allow_to_start_after_corruption_debug.err +--let SEARCH_FILE=$LOG + +# restart server to change error log and ignore corruptopn on startup +--let $_mysqld_option=--log-error=$LOG --rocksdb_allow_to_start_after_corruption=1 +--source include/restart_mysqld_with_option.inc + +--echo # +--echo # Test server crashes on corrupted data and restarts +--echo # +create table t1 ( + pk int not null primary key, + col1 varchar(10) +) engine=rocksdb; + +insert into t1 values (1,1),(2,2),(3,3); + +select * from t1 where pk=1; +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +select * from t1 where pk=1; +--source include/wait_until_disconnected.inc +--let SEARCH_PATTERN=data corruption detected +--source include/search_pattern_in_file.inc +--remove_file $LOG + +--echo # +--echo # The same for scan queries +--echo # + +--source include/start_mysqld_with_option.inc +select * from t1; +set session debug_dbug= "+d,rocksdb_return_status_corrupted"; +--exec echo "wait" > $_expect_file_name +--error 2013 +select * from t1; +--source include/wait_until_disconnected.inc +--let SEARCH_PATTERN=data corruption detected +--source include/search_pattern_in_file.inc +--remove_file $LOG + +--echo # +--echo # Test restart failure. The server is shutdown at this point. +--echo # + +# remove flag to ignore corruption +--let $_mysqld_option=--log-error=$LOG +--error 0 +--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO $_mysqld_option +--let SEARCH_PATTERN=The server will exit normally and stop restart attempts +--source include/search_pattern_in_file.inc +--remove_file $LOG + +--echo # +--echo # Remove corruption file and restart cleanly +--echo # + +--exec rm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/#rocksdb/ROCKSDB_CORRUPTED +--source include/start_mysqld_with_option.inc + +drop table t1; + +# Restart mysqld with default options +--source include/restart_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf new file mode 100644 index 00000000000..a43c4617b96 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf @@ -0,0 +1,8 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +binlog_format=row +[mysqld.2] +binlog_format=row +slave_parallel_workers=1 +#rpl_skip_tx_api=ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test new file mode 100644 index 00000000000..e61ba720aaf --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test @@ -0,0 +1,9 @@ +--source include/have_rocksdb.inc +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +create table t (i int primary key auto_increment) engine=rocksdb; + +--source include/autoinc_crash_safe.inc + +--source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf new file mode 100644 index 00000000000..0c0b614039e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf @@ -0,0 +1,8 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +binlog_format=row +[mysqld.2] +binlog_format=row +#slave_parallel_workers=1 +#rpl_skip_tx_api=ON diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test new file mode 100644 index 00000000000..56cf93db9d9 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test @@ -0,0 +1,10 @@ +--source include/have_rocksdb.inc +--source include/have_binlog_format_row.inc +--source include/master-slave.inc +--source include/have_partition.inc + +create table t (i int primary key auto_increment) engine=rocksdb partition by key (i) partitions 3; + +--source include/autoinc_crash_safe.inc + +--source include/rpl_end.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt new file mode 100644 index 00000000000..83ed8522e72 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt @@ -0,0 +1 @@ +--binlog-format=row diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test new file mode 100644 index 00000000000..abcae8d98a5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test @@ -0,0 +1,118 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc +--source include/have_log_bin.inc + +--echo # +--echo # Testing upgrading from server without merges for auto_increment +--echo # to new server with such support. +--echo # + +set debug_dbug='+d,myrocks_autoinc_upgrade'; +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; + +delete from t where i > 1; +select * from t; + +select table_name, index_name, auto_increment + from information_schema.rocksdb_ddl where table_name = 't'; + +set debug_dbug='-d,myrocks_autoinc_upgrade'; + +--source include/restart_mysqld.inc + +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; + +select table_name, index_name, auto_increment + from information_schema.rocksdb_ddl where table_name = 't'; + +delete from t where i > 1; + +--source include/restart_mysqld.inc + +insert into t values (); +insert into t values (); +insert into t values (); +select * from t; + +drop table t; + +--echo # +--echo # Testing crash safety of transactions. +--echo # +create table t (i int primary key auto_increment); +insert into t values (); +insert into t values (); +insert into t values (); + +--echo # Before anything +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_before"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +--echo # After engine prepare +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_after_prepare"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +--echo # After binlog +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_after_log"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +--echo # After everything +begin; +insert into t values (); +insert into t values (); +set debug_dbug="+d,crash_commit_after"; +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 2013 +commit; +--source include/wait_until_disconnected.inc +--enable_reconnect +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--source include/wait_until_connected_again.inc +--disable_reconnect +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; +select max(i) from t; + +drop table t; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test index 2fe0a2e3c08..b8968590155 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test @@ -64,4 +64,42 @@ SELECT LAST_INSERT_ID(); SELECT a FROM t1 ORDER BY a; DROP TABLE t1; +--echo #--------------------------- +--echo # test large autoincrement values +--echo #--------------------------- +SET auto_increment_increment = 1; +SET auto_increment_offset = 1; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'c'); +SELECT * FROM t1; +DROP TABLE t1; + +SET auto_increment_increment = 300; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'c'); +SELECT * FROM t1; +DROP TABLE t1; + +SET auto_increment_offset = 200; +CREATE TABLE t1 (a BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb; +INSERT INTO t1 VALUES (18446744073709551613, 'a'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'b'); +SHOW CREATE TABLE t1; +--error ER_AUTOINC_READ_FAILED +INSERT INTO t1 VALUES (NULL, 'c'); +SELECT * FROM t1; +DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test b/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test deleted file mode 100644 index 375571f705d..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoincrement.test +++ /dev/null @@ -1,3 +0,0 @@ ---source include/have_rocksdb.inc - ---echo # The test checks AUTO_INCREMENT capabilities that are not supported by RocksDB-SE. diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt new file mode 100644 index 00000000000..7d63dc74bb8 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt @@ -0,0 +1 @@ +--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}}; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test new file mode 100644 index 00000000000..00968aebb62 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test @@ -0,0 +1,61 @@ + +--echo # +--echo # Issue #809: Wrong query result with bloom filters +--echo # + +create table t1 ( + id1 bigint not null, + id2 bigint not null, + id3 varchar(100) not null, + id4 int not null, + id5 int not null, + value bigint, + value2 varchar(100), + primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; + + +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t3(seq int); +insert into t3 +select + 1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; + +insert t1 +select + (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; + +set global rocksdb_force_flush_memtable_now=1; + +--echo # Full table scan +explain +select * from t1 limit 10; +select * from t1 limit 10; + +--echo # An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; + +# A testcase for an assertion that the fix is removing +# The only requirement for the used column family is that it is reverse-ordered +create table t4 ( + pk int unsigned not null primary key, + kp1 int unsigned not null, + kp2 int unsigned not null, + col1 int unsigned, + key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; + +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); + +--echo # This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; + +drop table t1,t2,t3,t4; + + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test index 6c6c75dd37e..0db5e6d9cc4 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test @@ -7,4 +7,4 @@ --let pk_cf=cf1 --let data_order_desc=0 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test new file mode 100644 index 00000000000..18e40fbf4ab --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test @@ -0,0 +1,19 @@ +--source include/have_rocksdb.inc + +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; + +SET rocksdb_bulk_load_allow_unsorted=1; +SET rocksdb_bulk_load=1; + +INSERT INTO t1 VALUES (1); + +--connect (con1,localhost,root,,) +DROP TABLE t1; + +--connection default +--disconnect con1 + +# This would have crashed the server prior to the fix +SET rocksdb_bulk_load=0; +--error ER_NO_SUCH_TABLE +SELECT * FROM t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test index 4a4c42d1fcd..1e349d0ff18 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test @@ -1,7 +1,13 @@ --source include/have_rocksdb.inc +--source include/count_sessions.inc + +--let LOG1=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.1.err +--let $_mysqld_option=--log-error=$LOG1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_option.inc ### Bulk load ### -CREATE TABLE t1(pk INT, PRIMARY KEY(pk)); +CREATE TABLE t1(pk INT, PRIMARY KEY(pk)) ENGINE=ROCKSDB; # Make sure we get an error with out of order keys during bulk load SET rocksdb_bulk_load=1; @@ -21,19 +27,49 @@ INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); ---echo # ---echo # In MyRocks, the following statement will intentionally crash the server. ---echo # In MariaDB, it will cause an error --error ER_OVERLAPPING_KEYS SET rocksdb_bulk_load=0; ---echo # ---echo # Despite the error, bulk load operation is over so the variable value ---echo # will be 0: -select @@rocksdb_bulk_load; - +SHOW VARIABLES LIKE 'rocksdb_bulk_load'; call mtr.add_suppression('finalizing last SST file while setting bulk loading variable'); +SELECT * FROM t1; + +--let SEARCH_FILE=$LOG1 +--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while setting bulk loading variable +--source include/search_pattern_in_file.inc + +--let LOG2=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.2.err +--let $_mysqld_option=--log-error=$LOG2 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_option.inc +--remove_file $LOG1 + + +# Make sure we get an error in log when we disconnect and do not assert the server +--connect (con1,localhost,root,,) +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(1); +INSERT INTO t1 VALUES(2); +INSERT INTO t1 VALUES(20); +INSERT INTO t1 VALUES(21); +--connection default +--disconnect con1 + +SELECT * FROM t1; + +--source include/wait_until_count_sessions.inc + +--let SEARCH_FILE=$LOG2 +--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while disconnecting +--source include/search_pattern_in_file.inc + +--let LOG3=$MYSQLTEST_VARDIR/tmp/rocksdb.bulk_load_errors.3.err +--let $_mysqld_option=--log-error=$LOG3 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_option.inc +--remove_file $LOG2 + TRUNCATE TABLE t1; ### Bulk load with unsorted PKs ### @@ -60,3 +96,46 @@ SELECT * FROM t1; SET rocksdb_bulk_load_allow_unsorted=DEFAULT; DROP TABLE t1; + +# This would trigger a debug assertion that is just an error in release builds +CREATE TABLE t1(c1 INT KEY) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--error ER_KEYS_OUT_OF_ORDER +INSERT INTO t1 VALUES (),(),(); +SET rocksdb_bulk_load=0; +DROP TABLE t1; + +# Crash when table open cache closes handler with bulk load operation not finalized +SET @orig_table_open_cache=@@global.table_open_cache; +CREATE TABLE t1(a INT AUTO_INCREMENT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB DEFAULT CHARSET=latin1; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES(13, 0); +INSERT INTO t1 VALUES(2, 'test 2'); +INSERT INTO t1 VALUES(@id, @arg04); +SET @@global.table_open_cache=FALSE; +INSERT INTO t1 VALUES(51479+0.333333333,1); +DROP TABLE t1; +SET @@global.table_open_cache=@orig_table_open_cache; + +--let SEARCH_FILE=$LOG3 +--let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing bulk load while closing handler +--source include/search_pattern_in_file.inc + +--source include/restart_mysqld.inc + +--remove_file $LOG3 + +# Switch between tables, but also introduce duplicate key errors +CREATE TABLE t1 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +CREATE TABLE t2 (pk INT, PRIMARY KEY (pk)) ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1), (2); +INSERT INTO t2 VALUES (1), (2); +INSERT INTO t1 VALUES (1); +--error ER_OVERLAPPING_KEYS +INSERT INTO t2 VALUES (3); +SET rocksdb_bulk_load=0; +DROP TABLE t1; +DROP TABLE t2; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test index 7c4d7aef0e5..67d68ac7a2d 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test @@ -6,4 +6,4 @@ --let pk_cf=rev:cf1 --let data_order_desc=0 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test index a31e86753f3..7110fe5f1d7 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test @@ -6,4 +6,4 @@ --let pk_cf=rev:cf1 --let data_order_desc=1 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test index f36990ed567..6c6e51a2a51 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test @@ -6,4 +6,4 @@ --let pk_cf=cf1 --let data_order_desc=1 ---source bulk_load.inc +--source ../include/bulk_load.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test index 78bb9312ca5..2abeae343c9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test @@ -3,136 +3,4 @@ --let pk_cf=cf1 ---disable_warnings -DROP TABLE IF EXISTS t1; ---enable_warnings - -SET rocksdb_bulk_load_size=3; -SET rocksdb_bulk_load_allow_unsorted=1; - -### Test individual INSERTs ### - -# A table with only a PK won't have rows until the bulk load is finished -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); -SET rocksdb_bulk_load=1; ---disable_query_log -let $sign = 1; -let $max = 5; -let $i = 1; -while ($i <= $max) { - let $a = 1 + $sign * $i; - let $b = 1 - $sign * $i; - let $sign = -$sign; - let $insert = INSERT INTO t1 VALUES ($a, $b); - eval $insert; - inc $i; -} ---enable_query_log -SELECT * FROM t1; -SET rocksdb_bulk_load=0; -SELECT * FROM t1; -DROP TABLE t1; - -# A table with a PK and a SK shows rows immediately -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b)); -SET rocksdb_bulk_load=1; ---disable_query_log -let $sign = 1; -let $max = 5; -let $i = 1; -while ($i <= $max) { - let $a = 1 + $sign * $i; - let $b = 1 - $sign * $i; - let $sign = -$sign; - let $insert = INSERT INTO t1 VALUES ($a, $b); - eval $insert; - inc $i; -} ---enable_query_log - -SELECT * FROM t1; -SET rocksdb_bulk_load=0; -DROP TABLE t1; - -# Inserting into another table finishes bulk load to the previous table -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); -eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); - -SET rocksdb_bulk_load=1; -INSERT INTO t1 VALUES (1,1); -INSERT INTO t2 VALUES (1,1); -SELECT * FROM t1; -INSERT INTO t1 VALUES (2,2); -SELECT * FROM t2; -SELECT * FROM t1; -SET rocksdb_bulk_load=0; -SELECT * FROM t1; -DROP TABLE t1, t2; - -### Test bulk load from a file ### -eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf"); -eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf"); -eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") - PARTITION BY KEY() PARTITIONS 4; - ---let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` -# Create a text file with data to import into the table. -# PK and SK are not in any order ---let ROCKSDB_INFILE = $file -perl; -my $fn = $ENV{'ROCKSDB_INFILE'}; -open(my $fh, '>', $fn) || die "perl open($fn): $!"; -binmode $fh; -my $max = 5000000; -my $sign = 1; -for (my $ii = 0; $ii < $max; $ii++) -{ - my $a = 1 + $sign * $ii; - my $b = 1 - $sign * $ii; - print $fh "$a\t$b\n"; -} -close($fh); -EOF ---file_exists $file - -# Make sure a snapshot held by another user doesn't block the bulk load -connect (other,localhost,root,,); -set session transaction isolation level repeatable read; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; -start transaction with consistent snapshot; -select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; - -connection default; -set rocksdb_bulk_load=1; -set rocksdb_bulk_load_size=100000; ---disable_query_log ---echo LOAD DATA INFILE <input_file> INTO TABLE t1; -eval LOAD DATA INFILE '$file' INTO TABLE t1; ---echo LOAD DATA INFILE <input_file> INTO TABLE t2; -eval LOAD DATA INFILE '$file' INTO TABLE t2; ---echo LOAD DATA INFILE <input_file> INTO TABLE t3; -eval LOAD DATA INFILE '$file' INTO TABLE t3; ---enable_query_log -set rocksdb_bulk_load=0; - ---remove_file $file - -# Make sure row count index stats are correct ---replace_column 6 # 7 # 8 # 9 # -SHOW TABLE STATUS WHERE name LIKE 't%'; - -ANALYZE TABLE t1, t2, t3; - ---replace_column 6 # 7 # 8 # 9 # -SHOW TABLE STATUS WHERE name LIKE 't%'; - -# Make sure all the data is there. -select count(a) from t1; -select count(b) from t1; -select count(a) from t2; -select count(b) from t2; -select count(a) from t3; -select count(b) from t3; - -DROP TABLE t1, t2, t3; -SET rocksdb_bulk_load_allow_unsorted=0; +--source ../include/bulk_load_unsorted.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test new file mode 100644 index 00000000000..de9a5c26424 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test @@ -0,0 +1,5 @@ +--source include/have_rocksdb.inc + +--let pk_cf=rev:cf1 + +--source ../include/bulk_load_unsorted.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test index 689753faf8d..14a82d7e462 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/cardinality.test @@ -2,6 +2,48 @@ --source include/restart_mysqld.inc +# Test memtable cardinality statistics +CREATE TABLE t0 (id int PRIMARY KEY, a int, INDEX ix_a (a)) engine=rocksdb; + +# populate the table with 10 reconds where cardinality of id is N and a is N/2. +insert into t0 values (0, 0),(1, 1),(2, 2),(3, 3),(4, 4), +(5, 4),(6, 4),(7, 4),(8, 4),(9, 4); + +# Assert no cardinality data exists before ANALYZE TABLE is done +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="id"; +SELECT cardinality FROM information_schema.statistics where table_name="t0" and +column_name="a"; + +--disable_result_log +ANALYZE TABLE t0; +--enable_result_log + +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; + +# Flush the table and re-run the test as statistics is calculated a bit +# differently for memtable and SST files +SET GLOBAL rocksdb_force_flush_memtable_now = 1; +--disable_result_log +ANALYZE TABLE t0; +--enable_result_log + +SELECT table_rows into @N FROM information_schema.tables +WHERE table_name = "t0"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="id"; +SELECT FLOOR(@N/cardinality) FROM +information_schema.statistics where table_name="t0" and column_name="a"; + +drop table t0; + +# Test big table on SST + --disable_warnings DROP TABLE IF EXISTS t1,t10,t11; --enable_warnings diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test new file mode 100644 index 00000000000..b4866de4d3b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test @@ -0,0 +1,22 @@ +--disable_warnings +let $MYSQLD_DATADIR= `select @@datadir`; +let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; +let $error_log= $MYSQLTEST_VARDIR/log/my_restart.err; +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; + +--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "echo hello=world>>{}" + +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--shutdown_server 10 + +--error 1 +--exec $MYSQLD_CMD --plugin_load=$HA_ROCKSDB_SO --rocksdb_ignore_unknown_options=0 --loose-console --log-error=$error_log + +let SEARCH_FILE= $error_log; +let SEARCH_PATTERN= RocksDB: Compatibility check against existing database options failed; +--source include/search_pattern_in_file.inc +--enable_reconnect +--exec echo "restart" > $restart_file +--source include/wait_until_connected_again.inc +--exec find $MYSQLD_DATADIR/#rocksdb/OPTIONS* | sort -n | tail -1 | xargs -0 -I {} -t sh -c "sed -i '/hello=world/d' {}" +select variable_name, variable_value from information_schema.global_variables where variable_name="rocksdb_ignore_unknown_options"; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test index d2abcb3b63b..9677d2dbbaa 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test @@ -21,29 +21,29 @@ let $con3= `SELECT CONNECTION_ID()`; connection default; eval create table t (i int primary key) engine=$engine; insert into t values (1), (2), (3); ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; echo Deadlock #1; --source include/simple_deadlock.inc connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; echo Deadlock #2; --source include/simple_deadlock.inc connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; set global rocksdb_max_latest_deadlocks = 10; echo Deadlock #3; --source include/simple_deadlock.inc connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; set global rocksdb_max_latest_deadlocks = 1; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; connection con3; @@ -77,8 +77,10 @@ let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx where thread_id = $con2 and waiting_key != ""; --source include/wait_condition.inc +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; --error ER_LOCK_DEADLOCK select * from t where i=1 for update; +select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks'; rollback; connection con2; @@ -91,7 +93,7 @@ rollback; connection default; set global rocksdb_max_latest_deadlocks = 5; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; echo Deadlock #5; @@ -133,7 +135,7 @@ connection con3; rollback; connection default; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ show engine rocksdb transaction status; disconnect con1; @@ -143,11 +145,11 @@ disconnect con3; set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; set global rocksdb_deadlock_detect = @prior_deadlock_detect; drop table t; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ show engine rocksdb transaction status; set global rocksdb_max_latest_deadlocks = 0; --echo # Clears deadlock buffer of any existent deadlocks. set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; ---replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ +--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ show engine rocksdb transaction status; --source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test index 7dc3c207ecc..716f372067b 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test @@ -7,6 +7,7 @@ --disable_warnings DROP TABLE IF EXISTS is_ddl_t1; DROP TABLE IF EXISTS is_ddl_t2; +DROP TABLE IF EXISTS is_ddl_t3; --enable_warnings CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT, @@ -16,9 +17,13 @@ CREATE TABLE is_ddl_t1 (i INT, j INT, k INT, l INT, CREATE TABLE is_ddl_t2 (x INT, y INT, z INT, PRIMARY KEY (z, y) COMMENT 'zy_cf', KEY (x)) ENGINE = ROCKSDB; +CREATE TABLE is_ddl_t3 (a INT, b INT, c INT, PRIMARY KEY (a)) ENGINE = ROCKSDB + COMMENT "ttl_duration=3600;"; + --sorted_result -SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; +SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF,TTL_DURATION,INDEX_FLAGS FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%'; # cleanup DROP TABLE is_ddl_t1; DROP TABLE is_ddl_t2; +DROP TABLE is_ddl_t3; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test new file mode 100644 index 00000000000..21558899782 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test @@ -0,0 +1,158 @@ +--source include/have_rocksdb.inc + +set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout; +set @prior_deadlock_detect = @@rocksdb_deadlock_detect; +set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks; +set global rocksdb_deadlock_detect = on; +set global rocksdb_lock_wait_timeout = 10000; +--echo # Clears deadlock buffer of any prior deadlocks. +set global rocksdb_max_latest_deadlocks = 0; +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; + +# needed by simple_deadlock.inc +let $engine = rocksdb; + +--source include/count_sessions.inc +connect (con1,localhost,root,,); +let $con1= `SELECT CONNECTION_ID()`; + +connect (con2,localhost,root,,); +let $con2= `SELECT CONNECTION_ID()`; + +connect (con3,localhost,root,,); +let $con3= `SELECT CONNECTION_ID()`; + +connection default; +show create table information_schema.rocksdb_deadlock; + +create table t (i int primary key) engine=rocksdb; +insert into t values (1), (2), (3); +select * from information_schema.rocksdb_deadlock; + +echo Deadlock #1; +--source include/simple_deadlock.inc +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +echo Deadlock #2; +--source include/simple_deadlock.inc +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; +set global rocksdb_max_latest_deadlocks = 10; + +echo Deadlock #3; +--source include/simple_deadlock.inc +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; +set global rocksdb_max_latest_deadlocks = 1; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +connection con3; +set rocksdb_deadlock_detect_depth = 2; + +echo Deadlock #4; +connection con1; +begin; +select * from t where i=1 for update; + +connection con2; +begin; +select * from t where i=2 for update; + +connection con3; +begin; +select * from t where i=3 for update; + +connection con1; +send select * from t where i=2 for update; + +connection con2; +let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx +where thread_id = $con1 and waiting_key != ""; +--source include/wait_condition.inc + +send select * from t where i=3 for update; + +connection con3; +let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx +where thread_id = $con2 and waiting_key != ""; +--source include/wait_condition.inc + +--error ER_LOCK_DEADLOCK +select * from t where i=1 for update; +rollback; + +connection con2; +reap; +rollback; + +connection con1; +reap; +rollback; + +connection default; +set global rocksdb_max_latest_deadlocks = 5; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +echo Deadlock #5; +connection con1; +begin; +select * from t where i=1 for update; + +connection con2; +begin; +select * from t where i=2 for update; + +connection con3; +begin; +select * from t where i=3 lock in share mode; + +connection con1; +select * from t where i=100 for update; +select * from t where i=101 for update; +send select * from t where i=2 for update; + +connection con2; +let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx +where thread_id = $con1 and waiting_key != ""; +--source include/wait_condition.inc + +select * from t where i=3 lock in share mode; +select * from t where i=200 for update; +select * from t where i=201 for update; + +--error ER_LOCK_DEADLOCK +select * from t where i=1 lock in share mode; +rollback; + +connection con1; +reap; +rollback; + +connection con3; +rollback; + +connection default; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; + +disconnect con1; +disconnect con2; +disconnect con3; + +set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; +set global rocksdb_deadlock_detect = @prior_deadlock_detect; +drop table t; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY 6 INDEX_NAME 7 TABLE_NAME +select * from information_schema.rocksdb_deadlock; +set global rocksdb_max_latest_deadlocks = 0; +--echo # Clears deadlock buffer of any existent deadlocks. +set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks; +--replace_column 1 DEADLOCK_ID 2 TRANSACTION_ID 4 WAITING_KEY +select * from information_schema.rocksdb_deadlock; +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test index abf8d71911b..887b4dd6a65 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test @@ -70,14 +70,15 @@ while ($cnt) SELECT COUNT(*) FROM t1; +# flush the table first as statistics is calculated a bit differently for memtable and SST files +SET GLOBAL rocksdb_force_flush_memtable_now = 1; + -- disable_query_log -- disable_result_log ANALYZE TABLE t1; -- enable_result_log -- enable_query_log -SET GLOBAL rocksdb_force_flush_memtable_now = 1; - --replace_column 9 # EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100; UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100; @@ -95,8 +96,8 @@ while ($i <= 1000) { eval $insert; } --enable_query_log -analyze table t1; set global rocksdb_force_flush_memtable_now=1; +analyze table t1; --replace_column 9 # explain select * from t1 where key1 = 1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test index a4d26cf7739..2306558ff41 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test @@ -5,7 +5,8 @@ # t/index_merge_innodb.test # -# Index merge tests +# Index merge tests (the test is called 'index_merge_rocksdb2' because +# 'index_merge_rocksdb' has already existed before copying 'index_merge_innodb') # # Last update: # 2006-08-07 ML test refactored (MySQL 5.1) @@ -61,6 +62,7 @@ INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1; -- disable_query_log -- disable_result_log +set global rocksdb_force_flush_memtable_now=1; analyze table t1; -- enable_result_log -- enable_query_log diff --git a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test index 2ffc186dd8f..67dae5d6263 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test @@ -19,8 +19,8 @@ drop table t1; --let $max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1) --replace_result $max_index_id max_index_id -select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; -select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO; +select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; +select count(*) from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type<>'DDL_DROP_INDEX_ONGOING'; SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0; select VALUE into @keysIn from INFORMATION_SCHEMA.ROCKSDB_COMPACTION_STATS where CF_NAME = 'default' and LEVEL = 'Sum' and TYPE = 'KeyIn'; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt index a0bf5759ec4..3b7d80662db 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt +++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config-master.opt @@ -1,4 +1,5 @@ --rocksdb_write_disable_wal=1 +--rocksdb_flush_log_at_trx_commit=0 --rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=256;level0_stop_writes_trigger=256;max_write_buffer_number=16;compression_per_level=kNoCompression;memtable=vector:1024 --rocksdb_override_cf_options=__system__={memtable=skip_list:16} --rocksdb_compaction_sequential_deletes=0 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test index 4f1927d366c..d75f1e3c2a8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test @@ -8,6 +8,38 @@ SHOW TABLE STATUS LIKE 't1'; INSERT INTO t1 VALUES ('538647864786478647864'); --replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SELECT * FROM t1; +SHOW TABLE STATUS LIKE 't1'; + +--error ER_DUP_ENTRY +INSERT INTO t1 VALUES (); +SELECT * FROM t1; +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SHOW TABLE STATUS LIKE 't1'; + +--error ER_DUP_ENTRY +INSERT INTO t1 VALUES (); +SELECT * FROM t1; +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SHOW TABLE STATUS LIKE 't1'; + +DROP TABLE t1; + +CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT); + +INSERT INTO t1 VALUES (5); +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SHOW TABLE STATUS LIKE 't1'; + +INSERT INTO t1 VALUES (1000); +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # +SELECT * FROM t1; +SHOW TABLE STATUS LIKE 't1'; + +--error ER_DUP_ENTRY +INSERT INTO t1 VALUES (); +SELECT * FROM t1; +--replace_column 3 # 6 # 7 # 8 # 9 # 10 # SHOW TABLE STATUS LIKE 't1'; --error ER_DUP_ENTRY diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test index f1777ea3e93..5288680c3bd 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test @@ -16,16 +16,20 @@ set @@rocksdb_lock_wait_timeout=1; begin; --connection con1 +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --error ER_LOCK_WAIT_TIMEOUT insert into t values(0); select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --connection con2 +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --error ER_LOCK_WAIT_TIMEOUT insert into t values(0); select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t"; +select case when variable_value-@a = 1 then 'true' else 'false' end as waits from information_schema.global_status where variable_name='rocksdb_row_lock_wait_timeouts'; --disconnect con1 --connection default diff --git a/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test new file mode 100644 index 00000000000..c7c5e7b2ef3 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test @@ -0,0 +1,53 @@ +--source include/have_rocksdb.inc + +# Basic Sysbench run fails with basic MyROCKS install due to lack of open files + +# test for over limit +CALL mtr.add_suppression("RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit*"); + +--let $over_rocksdb_max_open_files=`SELECT @@global.open_files_limit + 100` +--let $under_rocksdb_max_open_files=`SELECT @@global.open_files_limit -1` +--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rocksdb.max_open_files.err +--let SEARCH_PATTERN=RocksDB: rocksdb_max_open_files should not be greater than the open_files_limit + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR $over_rocksdb_max_open_files over_rocksdb_max_open_files +--let $_mysqld_option=--log-error=$SEARCH_FILE --rocksdb_max_open_files=$over_rocksdb_max_open_files +--source include/restart_mysqld_with_option.inc +--source include/search_pattern_in_file.inc + +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; + +# test for within limit +--let $_mysqld_option=--rocksdb_max_open_files=$under_rocksdb_max_open_files +--source include/restart_mysqld_with_option.inc + +SELECT @@global.open_files_limit - 1 = @@global.rocksdb_max_open_files; + +# test for minimal value +--let $_mysqld_option=--rocksdb_max_open_files=0 +--source include/restart_mysqld_with_option.inc + +SELECT @@global.rocksdb_max_open_files; + +# verify that we can still do work with no descriptor cache +CREATE TABLE t1(a INT) ENGINE=ROCKSDB; +INSERT INTO t1 VALUES(0),(1),(2),(3),(4); +SET GLOBAL rocksdb_force_flush_memtable_and_lzero_now=1; +DROP TABLE t1; + +# test for unlimited +--let $_mysqld_option=--rocksdb_max_open_files=-1 +--source include/restart_mysqld_with_option.inc + +SELECT @@global.rocksdb_max_open_files; + +# test for auto-tune +--let $_mysqld_option=--rocksdb_max_open_files=-2 +--source include/restart_mysqld_with_option.inc + +SELECT FLOOR(@@global.open_files_limit / 2) = @@global.rocksdb_max_open_files; + +# cleanup +--let _$mysqld_option= +--source include/restart_mysqld.inc +--remove_file $SEARCH_FILE diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test index 9e904908330..4947ffb59b8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test @@ -32,7 +32,7 @@ BEGIN; insert into r1 values (5,5,5,5,5,5,5,5); update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1'; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test rollback; @@ -44,16 +44,16 @@ source include/search_pattern_in_file.inc; set @save_default_storage_engine=@@global.default_storage_engine; SET GLOBAL default_storage_engine=rocksdb; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test source include/search_pattern_in_file.inc; # Sanity test mysqldump when the --innodb-stats-on-metadata is specified (no effect) --echo ==== mysqldump with --innodb-stats-on-metadata ==== ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test # testing mysqldump work with statement based binary logging SET GLOBAL binlog_format=statement; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test > /dev/null SET GLOBAL binlog_format=row; drop table r1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test index 3631e703de6..ca9eb5d2ecf 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test @@ -29,7 +29,7 @@ let $restart_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; select variable_value into @a from information_schema.global_status where variable_name='rocksdb_block_cache_add'; ---exec $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null +--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb test > /dev/null # verifying block cache was not filled select case when variable_value - @a > 20 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_block_cache_add'; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test index 9199c572933..b884738424f 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test @@ -574,7 +574,6 @@ insert into t30 values ('row3', 'row3-key', 'row3-data'), ('row4', 'row4-key', 'row4-data'), ('row5', 'row5-key', 'row5-data'); -analyze table t30; --replace_column 9 # explain @@ -786,11 +785,15 @@ drop table t45; --echo # Now it fails if there is data overlap with what --echo # already exists --echo # +# We exclude rocksdb_max_open_files here because it value is dependent on +# the value of the servers open_file_limit and is expected to be different +# across distros and installs --replace_regex /[a-f0-9]{40}/#/ show variables where variable_name like 'rocksdb%' and + variable_name not like 'rocksdb_max_open_files' and variable_name not like 'rocksdb_supported_compression_types'; create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test new file mode 100644 index 00000000000..7cd4e09e946 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test @@ -0,0 +1,14 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc + +--echo # +--echo # Issue #728: Assertion `covers_key(b)' failed in int +--echo # myrocks::Rdb_key_def::cmp_full_keys(const rocks db::Slice&, +--echo # const rocksdb::Slice&) +--echo # + +CREATE TABLE t2(c1 TINYINT SIGNED KEY,c2 TINYINT UNSIGNED,c3 INT); +INSERT INTO t2(c1)VALUES(0); +SELECT * FROM t2 WHERE c1<=127 ORDER BY c1 DESC; +DROP TABLE t2; + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test index 6b8d0b90e90..a7ac236451e 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test @@ -15,6 +15,7 @@ while ($i<10000) --enable_query_log analyze table t1; select count(*) from t1; +--replace_column 9 # explain select c1 from t1 where c1 > 5 limit 10; drop table t1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test index 945b0079cce..c4321462dfd 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test @@ -1,39 +1,39 @@ --source include/have_rocksdb.inc --source include/have_debug.inc -# Write file to make mysql-test-run.pl expect the "crash", but don't restart the -# server until it is told to --let $_server_id= `SELECT @@server_id` ---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect -CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB; +create table t1 (pk int primary key) engine=rocksdb; # Create a .frm file without a matching table --exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm -# Restart the server with a .frm file exist but that table is not registered in RocksDB ---exec echo "wait" >$_expect_file_name -shutdown_server 10; ---exec echo "restart" >$_expect_file_name ---sleep 5 ---enable_reconnect ---source include/wait_until_connected_again.inc ---disable_reconnect +--source include/restart_mysqld.inc + +show tables; + +# MariaDB produces a warning: +call mtr.add_suppression('Invalid .old.. table or database name .t1#sql-test.'); # This will append '#sql-test' to the end of new name set session debug_dbug="+d,gen_sql_table_name"; rename table t1 to t2; set session debug_dbug= "-d,gen_sql_table_name"; +show tables; + # Remove the corresponding .frm files --remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm --remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm # Restart the server with a table registered in RocksDB but does not have a .frm file ---exec echo "wait" >$_expect_file_name -shutdown_server 10; ---exec echo "restart" >$_expect_file_name ---sleep 5 ---enable_reconnect ---source include/wait_until_connected_again.inc ---disable_reconnect +--source include/restart_mysqld.inc + +show tables; + +# try to recreate a table with the same name +create table t2 (pk int primary key) engine=rocksdb; + +show tables; + +drop table t2; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test index 5a694b7b222..4e8b081c4d5 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test @@ -78,22 +78,28 @@ INSERT INTO t1 values (7); set global rocksdb_debug_ttl_rec_ts = 0; # should return nothing. +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; # disable filtering set global rocksdb_enable_ttl_read_filtering=0; # should return everything +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; -# disable filtering +# enable filtering set global rocksdb_enable_ttl_read_filtering=1; # should return nothing. +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; DROP TABLE t1; @@ -286,28 +292,37 @@ SELECT * FROM t1; # <= shouldn't be filtered out here --echo # Switching to connection 2 connection con2; -# compaction doesn't do anythign since con1 snapshot is still open +# compaction doesn't do anything since con1 snapshot is still open set global rocksdb_force_flush_memtable_now=1; set global rocksdb_compact_cf='default'; # read filtered out, because on a different connection, on # this connection the records have 'expired' already so they are filtered out # even though they have not yet been removed by compaction + +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --echo # Switching to connection 1 connection con1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; # <= shouldn't be filtered out here +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; UPDATE t1 set a = a + 1; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result SELECT * FROM t1; # <= shouldn't be filtered out here +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; COMMIT; +select variable_value into @a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; --sorted_result # <= filtered out here because time has passed. SELECT * FROM t1; +select variable_value-@a from information_schema.global_status where variable_name='rocksdb_rows_filtered'; DROP TABLE t1; disconnect con1; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test index e45b6836f67..b631615c266 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test @@ -1,9 +1,5 @@ --source include/have_rocksdb.inc ---disable_warnings -drop table if exists t1,t2; ---enable_warnings - # # VARCHAR column types # @@ -73,3 +69,14 @@ select 'email_i' as index_name, count(*) AS count from t force index(email_i); drop table t; set global rocksdb_checksums_pct = @save_rocksdb_checksums_pct; set session rocksdb_verify_row_debug_checksums = @save_rocksdb_verify_row_debug_checksums; + +# Issue #784 - Skip trailing space bytes for non-unpackable fields + +drop table if exists t; +create table t (h varchar(31) character set utf8 collate utf8_bin not null, i varchar(19) collate latin1_bin not null, primary key(i), key(h)) engine=rocksdb; +insert into t(i,h) values('a','b'); +check table t; +alter table t modify h varchar(31) character set cp1257 collate cp1257_bin not null; +check table t; +drop table t; + diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test index 7a053c659b2..8dfbe312ea8 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test @@ -11,37 +11,51 @@ select plugin_name, plugin_type from information_schema.plugins where plugin_nam # caused an assertion in RocksDB. Now it should not be allowed and ROCKSDB # plugin will not load in such configuration. # -# We want the server to still start, so we specify default-storage-engine=myisam +--let LOG=$MYSQLTEST_VARDIR/tmp/use_direct_reads_writes.err +--let SEARCH_FILE=$LOG ---let $_mysqld_option=--rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1 --default-storage-engine=myisam ---source include/restart_mysqld_with_option.inc +--echo Checking direct reads +--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_reads=1 --rocksdb_allow_mmap_reads=1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_invalid_option.inc ---echo # Check that ROCKSDB plugin is not loaded: -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; +--let SEARCH_PATTERN=enable both use_direct_reads +--source include/search_pattern_in_file.inc +--remove_file $LOG ---echo # Check that MyRocks has printed an error message into server error log: -let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err; -let SEARCH_PATTERN=enable both use_direct_reads; -source include/search_pattern_in_file.inc; ---echo # Now, restart the server back with regular settings ---source include/restart_mysqld.inc -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; +# Repeat with direct-writes +--echo Checking direct writes +--let $_mysqld_option=--log-error=$LOG --rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_invalid_option.inc ---echo # ---echo # Now, repeat the same with another set of invalid arguments ---echo # ---let $_mysqld_option=--rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1 --default-storage-engine=myisam ---source include/restart_mysqld_with_option.inc +--let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction +--source include/search_pattern_in_file.inc +--remove_file $LOG ---echo # Check that ROCKSDB plugin is not loaded: -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; -let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err; -let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction; -source include/search_pattern_in_file.inc; +# Verify invalid direct-writes and --rocksdb_flush_log_at_trx_commit combination at startup fails +--echo Checking rocksdb_flush_log_at_trx_commit +--let $_mysqld_option=--log-error=$LOG --rocksdb_flush_log_at_trx_commit=1 --rocksdb_allow_mmap_writes=1 +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--source include/restart_mysqld_with_invalid_option.inc ---echo # Now, restart the server back with regular settings ---source include/restart_mysqld.inc -select plugin_name, plugin_type from information_schema.plugins where plugin_name='RocksDB'; +--let SEARCH_PATTERN=rocksdb_flush_log_at_trx_commit needs to be +--source include/search_pattern_in_file.inc +--remove_file $LOG + + +# Verify rocksdb_flush_log_at_trx_commit cannot be changed if direct writes are used +--echo Validate flush_log settings when direct writes is enabled +--let $_mysqld_option=--rocksdb_flush_log_at_trx_commit=0 --rocksdb_allow_mmap_writes=1 +--source include/restart_mysqld_with_option.inc + +set global rocksdb_flush_log_at_trx_commit=0; +--error ER_WRONG_VALUE_FOR_VAR +set global rocksdb_flush_log_at_trx_commit=1; +--error ER_WRONG_VALUE_FOR_VAR +set global rocksdb_flush_log_at_trx_commit=2; +# Cleanup +--source include/restart_mysqld.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test index c20bb1fc89c..e97a0b0bcc9 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test @@ -7,7 +7,8 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true; create table aaa (id int primary key, i int) engine rocksdb; set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit; SET GLOBAL rocksdb_flush_log_at_trx_commit=1; ---exec sleep 5 +insert aaa(id, i) values(0,1); + select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; insert aaa(id, i) values(1,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; @@ -16,18 +17,16 @@ select variable_value-@a from information_schema.global_status where variable_na insert aaa(id, i) values(3,1); select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced'; -SET GLOBAL rocksdb_flush_log_at_trx_commit=0; ---exec sleep 5 select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; +SET GLOBAL rocksdb_flush_log_at_trx_commit=0; insert aaa(id, i) values(4,1); let $status_var=rocksdb_wal_synced; let $status_var_value=`select @a+1`; source include/wait_for_status_var.inc; -SET GLOBAL rocksdb_flush_log_at_trx_commit=2; ---exec sleep 5 select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced'; +SET GLOBAL rocksdb_flush_log_at_trx_commit=2; insert aaa(id, i) values(5,1); let $status_var=rocksdb_wal_synced; diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc index 8f03c16e2f1..d983bdf8b58 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc @@ -30,6 +30,7 @@ INSERT INTO t1 VALUES(1, 1); connection slave; --let $slave_sql_errno= 1062 --let $not_switch_connection= 0 +--let $slave_timeout= 120 --source include/wait_for_slave_sql_error_and_skip.inc set global reset_seconds_behind_master=0; --source include/stop_slave_io.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result index 3d734c9498d..89e93f6b8f0 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result @@ -10,6 +10,7 @@ insert into r1 values (1, 1000); set global rocksdb_force_flush_memtable_now=1; include/rpl_start_server.inc [server_number=2] include/start_slave.inc +insert into r1 values (2,2000); delete r1 from r1 force index (i) where id2=1000; select id1,id2 from r1 force index (primary) where id1=1 and id2=1000; id1 id2 diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test index 6143824eea6..ff484171213 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test @@ -62,6 +62,7 @@ SET GLOBAL SYNC_BINLOG = 1; insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush"); +--error 0,2013 SET DEBUG_SYNC='now SIGNAL go'; --source include/wait_until_disconnected.inc --enable_reconnect diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test index 9180afa881f..6d953ead4e9 100644 --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test @@ -53,8 +53,14 @@ EOF --source include/rpl_start_server.inc --source include/start_slave.inc + +# Due to the binlogs being truncated, the slave may still think it's processed up to +# the truncated binlog and select master_pos_wait() can return prematurely. Add +# a new transaction to the master to force master_pos_wait() to wait. connection master; +insert into r1 values (2,2000); sync_slave_with_master; + connection slave; delete r1 from r1 force index (i) where id2=1000; select id1,id2 from r1 force index (primary) where id1=1 and id2=1000; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result index 3d76e035e05..9f161b18c05 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result +++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result @@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), +auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, +KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; stop slave; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result index 3d76e035e05..9f161b18c05 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result +++ b/storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result @@ -9,6 +9,8 @@ zero_sum INT DEFAULT 0, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), +auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, +KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; stop slave; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py index 20098f49b42..c1d3e7fb81c 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py +++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py @@ -95,6 +95,8 @@ LOADERS_READY = 0 REQUEST_ID = 1 REQUEST_ID_LOCK = threading.Lock() +INSERT_ID_SET = set() + def get_next_request_id(): global REQUEST_ID with REQUEST_ID_LOCK: @@ -302,10 +304,19 @@ class PopulateWorker(WorkerThread): execute(self.cur, stmt) if i % 101 == 0: self.con.commit() + check_id(self.con.insert_id()) self.con.commit() + check_id(self.con.insert_id()) logging.info("Inserted %d rows starting at id %d" % (self.num_to_add, self.start_id)) +def check_id(id): + if id == 0: + return + if id in INSERT_ID_SET: + raise Exception("Duplicate auto_inc id %d" % id) + INSERT_ID_SET.add(id) + def populate_table(num_records): logging.info("Populate_table started for %d records" % num_records) @@ -422,6 +433,7 @@ class LoadGenWorker(WorkerThread): execute(self.cur, gen_insert(self.table, idx, self.thread_id, request_id, 0)) self.con.commit() + check_id(self.con.insert_id()) self.id_map.append(request_id) @@ -687,6 +699,7 @@ class LoadGenWorker(WorkerThread): else: self.cur_txn_state = self.TXN_COMMIT_STARTED self.con.commit() + check_id(self.con.insert_id()) if not self.con.get_server_info(): raise MySQLdb.OperationalError(MySQLdb.constants.CR.CONNECTION_ERROR, "Possible connection error on commit") diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test index 7d92bb3f83a..307211a124d 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test +++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test @@ -17,6 +17,8 @@ CREATE TABLE t1(id INT PRIMARY KEY, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), + auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; diff --git a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test index 6f6128579b5..8ef4c73c3b0 100644 --- a/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test +++ b/storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test @@ -18,6 +18,8 @@ CREATE TABLE t1(id INT PRIMARY KEY, msg VARCHAR(1024), msg_length int, msg_checksum varchar(128), + auto_inc BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, + KEY(auto_inc), KEY msg_i(msg(255), zero_sum)) ENGINE=RocksDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result new file mode 100644 index 00000000000..086010dc79e --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result @@ -0,0 +1,7 @@ +SET @start_global_value = @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION; +SELECT @start_global_value; +@start_global_value +0 +"Trying to set variable @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION = 444; +ERROR HY000: Variable 'rocksdb_allow_to_start_after_corruption' is a read only variable diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result index ede02afcb60..9af4f730a21 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result @@ -1,7 +1,85 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); SET @start_global_value = @@global.ROCKSDB_BYTES_PER_SYNC; SELECT @start_global_value; @start_global_value 0 -"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_BYTES_PER_SYNC = 444; -ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a read only variable +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 100" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 100; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +100 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 1" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 1; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 0" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 0; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@session.ROCKSDB_BYTES_PER_SYNC to 444. It should fail because it is not session." +SET @@session.ROCKSDB_BYTES_PER_SYNC = 444; +ERROR HY000: Variable 'rocksdb_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'aaa'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to 'bbb'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '-1'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = '-1'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '101'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = '101'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_BYTES_PER_SYNC to '484436'" +SET @@global.ROCKSDB_BYTES_PER_SYNC = '484436'; +Got one of the listed errors +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +SET @@global.ROCKSDB_BYTES_PER_SYNC = @start_global_value; +SELECT @@global.ROCKSDB_BYTES_PER_SYNC; +@@global.ROCKSDB_BYTES_PER_SYNC +0 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result deleted file mode 100644 index 905feec9b1a..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_memtable_on_analyze_basic.result +++ /dev/null @@ -1,58 +0,0 @@ -drop table if exists t1; -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) NOT NULL AUTO_INCREMENT, - `b` int(11) DEFAULT NULL, - PRIMARY KEY (`a`) -) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); -SELECT * FROM t1; -a b -1 1 -2 2 -3 3 -set session rocksdb_flush_memtable_on_analyze=off; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status OK -SHOW INDEXES FROM t1; -Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment -t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE -set session rocksdb_flush_memtable_on_analyze=on; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status OK -SHOW INDEXES FROM t1; -Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment -t1 0 PRIMARY 1 a A 3 NULL NULL LSMTREE -DROP TABLE t1; -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) NOT NULL AUTO_INCREMENT, - `b` int(11) DEFAULT NULL, - PRIMARY KEY (`a`) -) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); -SELECT * FROM t1; -a b -1 1 -2 2 -3 3 -SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status OK -SHOW TABLE STATUS LIKE 't1'; -Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL -DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result new file mode 100644 index 00000000000..621213cd79b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result @@ -0,0 +1,14 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(1024); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +SET @start_global_value = @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS; +SELECT @start_global_value; +@start_global_value +1 +"Trying to set variable @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_IGNORE_UNKNOWN_OPTIONS = 444; +ERROR HY000: Variable 'rocksdb_ignore_unknown_options' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result index b058ebf05f8..60f505310c6 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result @@ -1,7 +1,3 @@ -SET @start_global_value = @@global.ROCKSDB_MAX_OPEN_FILES; -SELECT @start_global_value; -@start_global_value --1 -"Trying to set variable @@global.ROCKSDB_MAX_OPEN_FILES to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_MAX_OPEN_FILES = 444; -ERROR HY000: Variable 'rocksdb_max_open_files' is a read only variable +show variables like 'rocksdb_max_open_files'; +Variable_name Value +rocksdb_max_open_files # diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result index e417e4d5c4e..c925a68d4ed 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result @@ -6,11 +6,11 @@ INSERT INTO invalid_values VALUES('\'aaa\''); SET @start_global_value = @@global.ROCKSDB_MAX_ROW_LOCKS; SELECT @start_global_value; @start_global_value -1073741824 +1048576 SET @start_session_value = @@session.ROCKSDB_MAX_ROW_LOCKS; SELECT @start_session_value; @start_session_value -1073741824 +1048576 '# Setting to valid values in global scope#' "Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1" SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1; @@ -21,7 +21,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 "Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 1024" SET @@global.ROCKSDB_MAX_ROW_LOCKS = 1024; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@ -31,7 +31,7 @@ SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; SET @@global.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 '# Setting to valid values in session scope#' "Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1" SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1; @@ -42,7 +42,7 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@session.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 "Trying to set variable @@session.ROCKSDB_MAX_ROW_LOCKS to 1024" SET @@session.ROCKSDB_MAX_ROW_LOCKS = 1024; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@ -52,21 +52,21 @@ SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; SET @@session.ROCKSDB_MAX_ROW_LOCKS = DEFAULT; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@session.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 '# Testing with invalid values in global scope #' "Trying to set variable @@global.ROCKSDB_MAX_ROW_LOCKS to 'aaa'" SET @@global.ROCKSDB_MAX_ROW_LOCKS = 'aaa'; Got one of the listed errors SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 SET @@global.ROCKSDB_MAX_ROW_LOCKS = @start_global_value; SELECT @@global.ROCKSDB_MAX_ROW_LOCKS; @@global.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 SET @@session.ROCKSDB_MAX_ROW_LOCKS = @start_session_value; SELECT @@session.ROCKSDB_MAX_ROW_LOCKS; @@session.ROCKSDB_MAX_ROW_LOCKS -1073741824 +1048576 DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result index 11d4f2363f6..5a19016bf91 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result @@ -3,12 +3,12 @@ INSERT INTO valid_values VALUES(1); INSERT INTO valid_values VALUES(1024); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; INSERT INTO invalid_values VALUES('\'aaa\''); -SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE; +SET @start_global_value = @@global.ROCKSDB_TWO_WRITE_QUEUES; SELECT @start_global_value; @start_global_value 1 -"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444; -ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable +"Trying to set variable @@global.ROCKSDB_TWO_WRITE_QUEUES to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_TWO_WRITE_QUEUES = 444; +ERROR HY000: Variable 'rocksdb_two_write_queues' is a read only variable DROP TABLE valid_values; DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result new file mode 100644 index 00000000000..126b4cffe8b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result @@ -0,0 +1,38 @@ +CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options +update_cf1={write_buffer_size=8m;target_file_size_base=2m}; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=""; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m}; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options update_cf2={write_buffer_size=8m;target_file_size_base=2m}; +DROP TABLE t1; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options update_cf1={write_buffer_size=8m;target_file_size_base=2m}; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +Variable_name Value +rocksdb_update_cf_options diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result index 5ad5394db29..ba24fafd0ec 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result @@ -32,10 +32,19 @@ SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options NULL -SET @@global.rocksdb_update_cf_options = 'aaaaa'; +SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options NULL +SET @@global.rocksdb_update_cf_options = ''; +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options + +SET @@global.rocksdb_update_cf_options = 'aaaaa';; +ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'aaaaa' +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options + SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'; CF_NAME OPTION_TYPE VALUE default WRITE_BUFFER_SIZE 67108864 @@ -100,7 +109,12 @@ cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8}; SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE'; CF_NAME OPTION_TYPE VALUE cf1 TARGET_FILE_SIZE_BASE 25165824 -SET @@global.rocksdb_update_cf_options = 'default={foo=bar};'; +SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';; +ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of 'default={foo=bar};' +SELECT @@global.rocksdb_update_cf_options; +@@global.rocksdb_update_cf_options +cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8}; +SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; @@global.rocksdb_update_cf_options NULL diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result index 7da628b73fd..f432f1f7750 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result @@ -1,7 +1,85 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); SET @start_global_value = @@global.ROCKSDB_WAL_BYTES_PER_SYNC; SELECT @start_global_value; @start_global_value 0 -"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is readonly." -SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 444; -ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a read only variable +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 100" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 100; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +100 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 1" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 1; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 0" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 0; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = DEFAULT; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@session.ROCKSDB_WAL_BYTES_PER_SYNC to 444. It should fail because it is not session." +SET @@session.ROCKSDB_WAL_BYTES_PER_SYNC = 444; +ERROR HY000: Variable 'rocksdb_wal_bytes_per_sync' is a GLOBAL variable and should be set with SET GLOBAL +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'aaa'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to 'bbb'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '-1'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '-1'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '101'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '101'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +"Trying to set variable @@global.ROCKSDB_WAL_BYTES_PER_SYNC to '484436'" +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = '484436'; +Got one of the listed errors +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +SET @@global.ROCKSDB_WAL_BYTES_PER_SYNC = @start_global_value; +SELECT @@global.ROCKSDB_WAL_BYTES_PER_SYNC; +@@global.ROCKSDB_WAL_BYTES_PER_SYNC +0 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test new file mode 100644 index 00000000000..64fb2458424 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test @@ -0,0 +1,6 @@ +--source include/have_rocksdb.inc + +--let $sys_var=ROCKSDB_ALLOW_TO_START_AFTER_CORRUPTION +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test index d1d6b2b5695..bf78f578b6c 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test @@ -1,7 +1,22 @@ --source include/have_rocksdb.inc +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); + --let $sys_var=ROCKSDB_BYTES_PER_SYNC ---let $read_only=1 +--let $read_only=0 --let $session=0 --source include/rocksdb_sys_var.inc +DROP TABLE valid_values; +DROP TABLE invalid_values; + diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test deleted file mode 100644 index 574375cd1ea..00000000000 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_memtable_on_analyze_basic.test +++ /dev/null @@ -1,46 +0,0 @@ ---source include/have_rocksdb.inc - ---disable_warnings -drop table if exists t1; ---enable_warnings - -## -## test cardinality for analyze statements after flushing table -## - -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); ---sorted_result -SELECT * FROM t1; - -set session rocksdb_flush_memtable_on_analyze=off; -ANALYZE TABLE t1; -SHOW INDEXES FROM t1; - -set session rocksdb_flush_memtable_on_analyze=on; -ANALYZE TABLE t1; -SHOW INDEXES FROM t1; -DROP TABLE t1; - -## -## test data length for show table status statements for tables with few rows -## - -CREATE TABLE t1 (a INT AUTO_INCREMENT, b INT, PRIMARY KEY(a)) ENGINE=rocksdb; -SHOW CREATE TABLE t1; -INSERT INTO t1 (b) VALUES (1); -INSERT INTO t1 (b) VALUES (2); -INSERT INTO t1 (b) VALUES (3); ---sorted_result -SELECT * FROM t1; - ---replace_column 5 # 6 # 7 # -SHOW TABLE STATUS LIKE 't1'; -ANALYZE TABLE t1; ---replace_column 5 # 6 # 7 # -SHOW TABLE STATUS LIKE 't1'; - -DROP TABLE t1; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test new file mode 100644 index 00000000000..f10ff2c6123 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test @@ -0,0 +1,16 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(1024); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); + +--let $sys_var=ROCKSDB_IGNORE_UNKNOWN_OPTIONS +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test index ba3293264ab..36996761507 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test @@ -1,6 +1,8 @@ --source include/have_rocksdb.inc ---let $sys_var=ROCKSDB_MAX_OPEN_FILES ---let $read_only=1 ---let $session=0 ---source include/rocksdb_sys_var.inc +# We can not use rocksdb_sys_var.inc here as this is a global, read only option +# whose value is dependent on the servers open_files_limit. It is more fully +# tested in the rocksdb.max_open_files test. + +--replace_column 2 # +show variables like 'rocksdb_max_open_files'; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test index 451653fe769..43579faba82 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test @@ -7,7 +7,7 @@ INSERT INTO valid_values VALUES(1024); CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; INSERT INTO invalid_values VALUES('\'aaa\''); ---let $sys_var=ROCKSDB_CONCURRENT_PREPARE +--let $sys_var=ROCKSDB_TWO_WRITE_QUEUES --let $read_only=1 --let $session=0 --source ../include/rocksdb_sys_var.inc diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test new file mode 100644 index 00000000000..03626260cab --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test @@ -0,0 +1,22 @@ +--source include/have_rocksdb.inc + +CREATE TABLE t1 (a INT, PRIMARY KEY (a) COMMENT 'update_cf1') ENGINE=ROCKSDB; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SELECT @@global.rocksdb_update_cf_options; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=""; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=NULL; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf2={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +DROP TABLE t1; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS='update_cf1={write_buffer_size=8m;target_file_size_base=2m};'; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; +SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=DEFAULT; +SHOW GLOBAL VARIABLES LIKE 'rocksdb_update_cf_options'; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test index 0e675dafed3..533b2db8204 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test @@ -39,8 +39,17 @@ SELECT @@global.rocksdb_update_cf_options; SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; +# Make sure that we do not double free the NULL string +SET @@global.rocksdb_update_cf_options = NULL; +SELECT @@global.rocksdb_update_cf_options; + +# Attempt setting an empty string +SET @@global.rocksdb_update_cf_options = ''; +SELECT @@global.rocksdb_update_cf_options; + # Will fail to parse. Value not updated. -SET @@global.rocksdb_update_cf_options = 'aaaaa'; +--Error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.rocksdb_update_cf_options = 'aaaaa'; SELECT @@global.rocksdb_update_cf_options; SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'; @@ -87,7 +96,11 @@ SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FIL # Will fail to parse. No valid assignments included. Value not updated and # reset to NULL. -SET @@global.rocksdb_update_cf_options = 'default={foo=bar};'; +--Error ER_WRONG_VALUE_FOR_VAR +--eval SET @@global.rocksdb_update_cf_options = 'default={foo=bar};'; +SELECT @@global.rocksdb_update_cf_options; + +SET @@global.rocksdb_update_cf_options = NULL; SELECT @@global.rocksdb_update_cf_options; USE test; diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test index afab0f20d40..9c2a1f4f391 100644 --- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test +++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test @@ -1,6 +1,22 @@ --source include/have_rocksdb.inc +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(100); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +INSERT INTO invalid_values VALUES('\'-1\''); +INSERT INTO invalid_values VALUES('\'101\''); +INSERT INTO invalid_values VALUES('\'484436\''); + --let $sys_var=ROCKSDB_WAL_BYTES_PER_SYNC ---let $read_only=1 +--let $read_only=0 --let $session=0 --source include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; + diff --git a/storage/rocksdb/patch/port/win/io_win.h b/storage/rocksdb/patch/port/win/io_win.h deleted file mode 100644 index f5ff253bbaa..00000000000 --- a/storage/rocksdb/patch/port/win/io_win.h +++ /dev/null @@ -1,446 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -#pragma once - -#include <stdint.h> -#include <mutex> -#include <string> - -#include "rocksdb/status.h" -#include "rocksdb/env.h" -#include "util/aligned_buffer.h" - -#include <windows.h> - - -namespace rocksdb { -namespace port { - -std::string GetWindowsErrSz(DWORD err); - -inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) { - return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL)) - ? Status::NoSpace(context, GetWindowsErrSz(err)) - : Status::IOError(context, GetWindowsErrSz(err)); -} - -inline Status IOErrorFromLastWindowsError(const std::string& context) { - return IOErrorFromWindowsError(context, GetLastError()); -} - -inline Status IOError(const std::string& context, int err_number) { - return (err_number == ENOSPC) - ? Status::NoSpace(context, strerror(err_number)) - : Status::IOError(context, strerror(err_number)); -} - -// Note the below two do not set errno because they are used only here in this -// file -// on a Windows handle and, therefore, not necessary. Translating GetLastError() -// to errno -// is a sad business -inline int fsync(HANDLE hFile) { - if (!FlushFileBuffers(hFile)) { - return -1; - } - - return 0; -} - -SSIZE_T pwrite(HANDLE hFile, const char* src, size_t numBytes, uint64_t offset); - -SSIZE_T pread(HANDLE hFile, char* src, size_t numBytes, uint64_t offset); - -Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size); - -Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize); - -size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size); - -class WinFileData { - protected: - const std::string filename_; - HANDLE hFile_; - // If ture, the I/O issued would be direct I/O which the buffer - // will need to be aligned (not sure there is a guarantee that the buffer - // passed in is aligned). - const bool use_direct_io_; - - public: - // We want this class be usable both for inheritance (prive - // or protected) and for containment so __ctor and __dtor public - WinFileData(const std::string& filename, HANDLE hFile, bool direct_io) - : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {} - - virtual ~WinFileData() { this->CloseFile(); } - - bool CloseFile() { - bool result = true; - - if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) { - result = ::CloseHandle(hFile_); - assert(result); - hFile_ = NULL; - } - return result; - } - - const std::string& GetName() const { return filename_; } - - HANDLE GetFileHandle() const { return hFile_; } - - bool use_direct_io() const { return use_direct_io_; } - - WinFileData(const WinFileData&) = delete; - WinFileData& operator=(const WinFileData&) = delete; -}; - -class WinSequentialFile : protected WinFileData, public SequentialFile { - - // Override for behavior change when creating a custom env - virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const; - -public: - WinSequentialFile(const std::string& fname, HANDLE f, - const EnvOptions& options); - - ~WinSequentialFile(); - - WinSequentialFile(const WinSequentialFile&) = delete; - WinSequentialFile& operator=(const WinSequentialFile&) = delete; - - virtual Status Read(size_t n, Slice* result, char* scratch) override; - virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result, - char* scratch) override; - - virtual Status Skip(uint64_t n) override; - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } -}; - -// mmap() based random-access -class WinMmapReadableFile : private WinFileData, public RandomAccessFile { - HANDLE hMap_; - - const void* mapped_region_; - const size_t length_; - - public: - // mapped_region_[0,length-1] contains the mmapped contents of the file. - WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap, - const void* mapped_region, size_t length); - - ~WinMmapReadableFile(); - - WinMmapReadableFile(const WinMmapReadableFile&) = delete; - WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete; - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const override; - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; -}; - -// We preallocate and use memcpy to append new -// data to the file. This is safe since we either properly close the -// file before reading from it, or for log files, the reading code -// knows enough to skip zero suffixes. -class WinMmapFile : private WinFileData, public WritableFile { - private: - HANDLE hMap_; - - const size_t page_size_; // We flush the mapping view in page_size - // increments. We may decide if this is a memory - // page size or SSD page size - const size_t - allocation_granularity_; // View must start at such a granularity - - size_t reserved_size_; // Preallocated size - - size_t mapping_size_; // The max size of the mapping object - // we want to guess the final file size to minimize the remapping - size_t view_size_; // How much memory to map into a view at a time - - char* mapped_begin_; // Must begin at the file offset that is aligned with - // allocation_granularity_ - char* mapped_end_; - char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_]) - char* last_sync_; // Where have we synced up to - - uint64_t file_offset_; // Offset of mapped_begin_ in file - - // Do we have unsynced writes? - bool pending_sync_; - - // Can only truncate or reserve to a sector size aligned if - // used on files that are opened with Unbuffered I/O - Status TruncateFile(uint64_t toSize); - - Status UnmapCurrentRegion(); - - Status MapNewRegion(); - - virtual Status PreallocateInternal(uint64_t spaceToReserve); - - public: - WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size, - size_t allocation_granularity, const EnvOptions& options); - - ~WinMmapFile(); - - WinMmapFile(const WinMmapFile&) = delete; - WinMmapFile& operator=(const WinMmapFile&) = delete; - - virtual Status Append(const Slice& data) override; - - // Means Close() will properly take care of truncate - // and it does not need any additional information - virtual Status Truncate(uint64_t size) override; - - virtual Status Close() override; - - virtual Status Flush() override; - - // Flush only data - virtual Status Sync() override; - - /** - * Flush data as well as metadata to stable storage. - */ - virtual Status Fsync() override; - - /** - * Get the size of valid data in the file. This will not match the - * size that is returned from the filesystem because we use mmap - * to extend file by map_size every time. - */ - virtual uint64_t GetFileSize() override; - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual Status Allocate(uint64_t offset, uint64_t len) override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; -}; - -class WinRandomAccessImpl { - protected: - WinFileData* file_base_; - size_t alignment_; - - // Override for behavior change when creating a custom env - virtual SSIZE_T PositionedReadInternal(char* src, size_t numBytes, - uint64_t offset) const; - - WinRandomAccessImpl(WinFileData* file_base, size_t alignment, - const EnvOptions& options); - - virtual ~WinRandomAccessImpl() {} - - Status ReadImpl(uint64_t offset, size_t n, Slice* result, - char* scratch) const; - - size_t GetAlignment() const { return alignment_; } - - public: - - WinRandomAccessImpl(const WinRandomAccessImpl&) = delete; - WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete; -}; - -// pread() based random-access -class WinRandomAccessFile - : private WinFileData, - protected WinRandomAccessImpl, // Want to be able to override - // PositionedReadInternal - public RandomAccessFile { - public: - WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, - const EnvOptions& options); - - ~WinRandomAccessFile(); - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; - - virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } - - virtual Status InvalidateCache(size_t offset, size_t length) override; - - virtual size_t GetRequiredBufferAlignment() const override; -}; - -// This is a sequential write class. It has been mimicked (as others) after -// the original Posix class. We add support for unbuffered I/O on windows as -// well -// we utilize the original buffer as an alignment buffer to write directly to -// file with no buffering. -// No buffering requires that the provided buffer is aligned to the physical -// sector size (SSD page size) and -// that all SetFilePointer() operations to occur with such an alignment. -// We thus always write in sector/page size increments to the drive and leave -// the tail for the next write OR for Close() at which point we pad with zeros. -// No padding is required for -// buffered access. -class WinWritableImpl { - protected: - WinFileData* file_data_; - const uint64_t alignment_; - uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND - uint64_t reservedsize_; // how far we have reserved space - - virtual Status PreallocateInternal(uint64_t spaceToReserve); - - WinWritableImpl(WinFileData* file_data, size_t alignment); - - ~WinWritableImpl() {} - - - uint64_t GetAlignement() const { return alignment_; } - - Status AppendImpl(const Slice& data); - - // Requires that the data is aligned as specified by - // GetRequiredBufferAlignment() - Status PositionedAppendImpl(const Slice& data, uint64_t offset); - - Status TruncateImpl(uint64_t size); - - Status CloseImpl(); - - Status SyncImpl(); - - uint64_t GetFileNextWriteOffset() { - // Double accounting now here with WritableFileWriter - // and this size will be wrong when unbuffered access is used - // but tests implement their own writable files and do not use - // WritableFileWrapper - // so we need to squeeze a square peg through - // a round hole here. - return next_write_offset_; - } - - Status AllocateImpl(uint64_t offset, uint64_t len); - - public: - WinWritableImpl(const WinWritableImpl&) = delete; - WinWritableImpl& operator=(const WinWritableImpl&) = delete; -}; - -class WinWritableFile : private WinFileData, - protected WinWritableImpl, - public WritableFile { - public: - WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment, - size_t capacity, const EnvOptions& options); - - ~WinWritableFile(); - - bool IsSyncThreadSafe() const override { - return true; - } - - virtual Status Append(const Slice& data) override; - - // Requires that the data is aligned as specified by - // GetRequiredBufferAlignment() - virtual Status PositionedAppend(const Slice& data, uint64_t offset) override; - - // Need to implement this so the file is truncated correctly - // when buffered and unbuffered mode - virtual Status Truncate(uint64_t size) override; - - virtual Status Close() override; - - // write out the cached data to the OS cache - // This is now taken care of the WritableFileWriter - virtual Status Flush() override; - - virtual Status Sync() override; - - virtual Status Fsync() override; - - // Indicates if the class makes use of direct I/O - // Use PositionedAppend - virtual bool use_direct_io() const override; - - virtual size_t GetRequiredBufferAlignment() const override; - - virtual uint64_t GetFileSize() override; - - virtual Status Allocate(uint64_t offset, uint64_t len) override; - - virtual size_t GetUniqueId(char* id, size_t max_size) const override; -}; - -class WinRandomRWFile : private WinFileData, - protected WinRandomAccessImpl, - protected WinWritableImpl, - public RandomRWFile { - public: - WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment, - const EnvOptions& options); - - ~WinRandomRWFile() {} - - // Indicates if the class makes use of direct I/O - // If false you must pass aligned buffer to Write() - virtual bool use_direct_io() const override; - - // Use the returned alignment value to allocate aligned - // buffer for Write() when use_direct_io() returns true - virtual size_t GetRequiredBufferAlignment() const override; - - // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. - // Pass aligned buffer when use_direct_io() returns true. - virtual Status Write(uint64_t offset, const Slice& data) override; - - // Read up to `n` bytes starting from offset `offset` and store them in - // result, provided `scratch` size should be at least `n`. - // Returns Status::OK() on success. - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const override; - - virtual Status Flush() override; - - virtual Status Sync() override; - - virtual Status Fsync() { return Sync(); } - - virtual Status Close() override; -}; - -class WinDirectory : public Directory { - public: - WinDirectory() {} - - virtual Status Fsync() override; -}; - -class WinFileLock : public FileLock { - public: - explicit WinFileLock(HANDLE hFile) : hFile_(hFile) { - assert(hFile != NULL); - assert(hFile != INVALID_HANDLE_VALUE); - } - - ~WinFileLock(); - - private: - HANDLE hFile_; -}; -} -} diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc index 368e6d88d1c..92396b23f7e 100644 --- a/storage/rocksdb/properties_collector.cc +++ b/storage/rocksdb/properties_collector.cc @@ -54,17 +54,9 @@ Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, : m_cf_id(cf_id), m_ddl_manager(ddl_manager), m_last_stats(nullptr), m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l), m_file_size(0), m_params(params), - m_table_stats_sampling_pct(table_stats_sampling_pct), - m_seed(time(nullptr)), m_card_adj_extra(1.) { + m_cardinality_collector(table_stats_sampling_pct) { DBUG_ASSERT(ddl_manager != nullptr); - // We need to adjust the index cardinality numbers based on the sampling - // rate so that the output of "SHOW INDEX" command will reflect reality - // more closely. It will still be an approximation, just a better one. - if (m_table_stats_sampling_pct > 0) { - m_card_adj_extra = 100. / m_table_stats_sampling_pct; - } - m_deleted_rows_window.resize(m_params.m_window, false); } @@ -147,7 +139,7 @@ Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) { m_last_stats->m_name = m_keydef->get_name(); } } - m_last_key.clear(); + m_cardinality_collector.Reset(); } return m_last_stats; @@ -157,7 +149,7 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key, const rocksdb::Slice &value, const rocksdb::EntryType &type, const uint64_t &file_size) { - const auto stats = AccessStats(key); + auto stats = AccessStats(key); stats->m_data_size += key.size() + value.size(); @@ -183,38 +175,15 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key, sql_print_error("RocksDB: Unexpected entry type found: %u. " "This should not happen so aborting the system.", type); - abort_with_stack_traces(); + abort(); break; } stats->m_actual_disk_size += file_size - m_file_size; m_file_size = file_size; - if (m_keydef != nullptr && ShouldCollectStats()) { - std::size_t column = 0; - bool new_key = true; - - if (!m_last_key.empty()) { - rocksdb::Slice last(m_last_key.data(), m_last_key.size()); - new_key = (m_keydef->compare_keys(&last, &key, &column) == 0); - } - - if (new_key) { - DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size()); - - for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) { - stats->m_distinct_keys_per_prefix[i]++; - } - - // assign new last_key for the next call - // however, we only need to change the last key - // if one of the first n-1 columns is different - // If the n-1 prefix is the same, no sense in storing - // the new key - if (column < stats->m_distinct_keys_per_prefix.size()) { - m_last_key.assign(key.data(), key.size()); - } - } + if (m_keydef != nullptr) { + m_cardinality_collector.ProcessKey(key, m_keydef.get(), stats); } } @@ -261,8 +230,10 @@ Rdb_tbl_prop_coll::Finish(rocksdb::UserCollectedProperties *const properties) { rocksdb_num_sst_entry_other += num_sst_entry_other; } - properties->insert({INDEXSTATS_KEY, - Rdb_index_stats::materialize(m_stats, m_card_adj_extra)}); + for (Rdb_index_stats &stat : m_stats) { + m_cardinality_collector.AdjustStats(&stat); + } + properties->insert({INDEXSTATS_KEY, Rdb_index_stats::materialize(m_stats)}); return rocksdb::Status::OK(); } @@ -272,23 +243,6 @@ bool Rdb_tbl_prop_coll::NeedCompact() const { (m_max_deleted_rows > m_params.m_deletes); } -bool Rdb_tbl_prop_coll::ShouldCollectStats() { - // Zero means that we'll use all the keys to update statistics. - if (!m_table_stats_sampling_pct || - RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct) { - return true; - } - - const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX - - RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) + - RDB_TBL_STATS_SAMPLE_PCT_MIN; - - DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN); - DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX); - - return val <= m_table_stats_sampling_pct; -} - /* Returns the same as above, but in human-readable way for logging */ @@ -365,8 +319,7 @@ void Rdb_tbl_prop_coll::read_stats_from_tbl_props( Serializes an array of Rdb_index_stats into a network string. */ std::string -Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats, - const float card_adj_extra) { +Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats) { String ret; rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES); for (const auto &i : stats) { @@ -382,8 +335,7 @@ Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats, rdb_netstr_append_uint64(&ret, i.m_entry_merges); rdb_netstr_append_uint64(&ret, i.m_entry_others); for (const auto &num_keys : i.m_distinct_keys_per_prefix) { - const float upd_num_keys = num_keys * card_adj_extra; - rdb_netstr_append_uint64(&ret, static_cast<int64_t>(upd_num_keys)); + rdb_netstr_append_uint64(&ret, num_keys); } } @@ -416,7 +368,7 @@ int Rdb_index_stats::unmaterialize(const std::string &s, sql_print_error("Index stats version %d was outside of supported range. " "This should not happen so aborting the system.", version); - abort_with_stack_traces(); + abort(); } size_t needed = sizeof(stats.m_gl_index_id.cf_id) + @@ -521,4 +473,75 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment, } } +Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct) + : m_table_stats_sampling_pct(table_stats_sampling_pct), + m_seed(time(nullptr)) {} + +bool Rdb_tbl_card_coll::IsSampingDisabled() { + // Zero means that we'll use all the keys to update statistics. + return m_table_stats_sampling_pct == 0 || + RDB_TBL_STATS_SAMPLE_PCT_MAX == m_table_stats_sampling_pct; +} + +bool Rdb_tbl_card_coll::ShouldCollectStats() { + if (IsSampingDisabled()) { + return true; // collect every key + } + + const int val = rand_r(&m_seed) % (RDB_TBL_STATS_SAMPLE_PCT_MAX - + RDB_TBL_STATS_SAMPLE_PCT_MIN + 1) + + RDB_TBL_STATS_SAMPLE_PCT_MIN; + + DBUG_ASSERT(val >= RDB_TBL_STATS_SAMPLE_PCT_MIN); + DBUG_ASSERT(val <= RDB_TBL_STATS_SAMPLE_PCT_MAX); + + return val <= m_table_stats_sampling_pct; +} + +void Rdb_tbl_card_coll::ProcessKey(const rocksdb::Slice &key, + const Rdb_key_def *keydef, + Rdb_index_stats *stats) { + if (ShouldCollectStats()) { + std::size_t column = 0; + bool new_key = true; + + if (!m_last_key.empty()) { + rocksdb::Slice last(m_last_key.data(), m_last_key.size()); + new_key = (keydef->compare_keys(&last, &key, &column) == 0); + } + + if (new_key) { + DBUG_ASSERT(column <= stats->m_distinct_keys_per_prefix.size()); + + for (auto i = column; i < stats->m_distinct_keys_per_prefix.size(); i++) { + stats->m_distinct_keys_per_prefix[i]++; + } + + // assign new last_key for the next call + // however, we only need to change the last key + // if one of the first n-1 columns is different + // If the n-1 prefix is the same, no sense in storing + // the new key + if (column < stats->m_distinct_keys_per_prefix.size()) { + m_last_key.assign(key.data(), key.size()); + } + } + } +} + +void Rdb_tbl_card_coll::Reset() { m_last_key.clear(); } + +// We need to adjust the index cardinality numbers based on the sampling +// rate so that the output of "SHOW INDEX" command will reflect reality +// more closely. It will still be an approximation, just a better one. +void Rdb_tbl_card_coll::AdjustStats(Rdb_index_stats *stats) { + if (IsSampingDisabled()) { + // no sampling was done, return as stats is + return; + } + for (int64_t &num_keys : stats->m_distinct_keys_per_prefix) { + num_keys = num_keys * 100 / m_table_stats_sampling_pct; + } +} + } // namespace myrocks diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h index 9ae519d95c7..1441d893420 100644 --- a/storage/rocksdb/properties_collector.h +++ b/storage/rocksdb/properties_collector.h @@ -56,8 +56,7 @@ struct Rdb_index_stats { std::vector<int64_t> m_distinct_keys_per_prefix; std::string m_name; // name is not persisted - static std::string materialize(const std::vector<Rdb_index_stats> &stats, - const float card_adj_extra); + static std::string materialize(const std::vector<Rdb_index_stats> &stats); static int unmaterialize(const std::string &s, std::vector<Rdb_index_stats> *const ret); @@ -71,6 +70,40 @@ struct Rdb_index_stats { const int64_t &estimated_data_len = 0); }; +// The helper class to calculate index cardinality +class Rdb_tbl_card_coll { + public: + explicit Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct); + + public: + void ProcessKey(const rocksdb::Slice &key, const Rdb_key_def *keydef, + Rdb_index_stats *stats); + /* + * Resets the state of the collector to start calculating statistics for a + * next index. + */ + void Reset(); + + /* + * Cardinality statistics might be calculated using some sampling strategy. + * This method adjusts gathered statistics according to the sampling + * strategy used. Note that adjusted cardinality value is just an estimate + * and can return a value exeeding number of rows in a table, so the + * returned value should be capped by row count before using it by + * an optrimizer or displaying it to a clent. + */ + void AdjustStats(Rdb_index_stats *stats); + + private: + bool ShouldCollectStats(); + bool IsSampingDisabled(); + + private: + std::string m_last_key; + uint8_t m_table_stats_sampling_pct; + unsigned int m_seed; +}; + class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector { public: Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager, @@ -130,9 +163,7 @@ private: uint64_t m_rows, m_window_pos, m_deleted_rows, m_max_deleted_rows; uint64_t m_file_size; Rdb_compact_params m_params; - uint8_t m_table_stats_sampling_pct; - unsigned int m_seed; - float m_card_adj_extra; + Rdb_tbl_card_coll m_cardinality_collector; }; class Rdb_tbl_prop_coll_factory diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc index e608580c666..4d97ab8058d 100644 --- a/storage/rocksdb/rdb_cf_options.cc +++ b/storage/rocksdb/rdb_cf_options.cc @@ -325,6 +325,13 @@ Rdb_cf_options::get_cf_comparator(const std::string &cf_name) { } } +std::shared_ptr<rocksdb::MergeOperator> +Rdb_cf_options::get_cf_merge_operator(const std::string &cf_name) { + return (cf_name == DEFAULT_SYSTEM_CF_NAME) + ? std::make_shared<Rdb_system_merge_op>() + : nullptr; +} + void Rdb_cf_options::get_cf_options(const std::string &cf_name, rocksdb::ColumnFamilyOptions *const opts) { DBUG_ASSERT(opts != nullptr); @@ -334,6 +341,7 @@ void Rdb_cf_options::get_cf_options(const std::string &cf_name, // Set the comparator according to 'rev:' opts->comparator = get_cf_comparator(cf_name); + opts->merge_operator = get_cf_merge_operator(cf_name); } } // namespace myrocks diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h index 32f2308284f..19e5da6a79e 100644 --- a/storage/rocksdb/rdb_cf_options.h +++ b/storage/rocksdb/rdb_cf_options.h @@ -64,6 +64,9 @@ public: static const rocksdb::Comparator * get_cf_comparator(const std::string &cf_name); + std::shared_ptr<rocksdb::MergeOperator> + get_cf_merge_operator(const std::string &cf_name); + void get_cf_options(const std::string &cf_name, rocksdb::ColumnFamilyOptions *const opts) MY_ATTRIBUTE((__nonnull__)); diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h index 9bc7ece6e7a..20ae3c740c1 100644 --- a/storage/rocksdb/rdb_compact_filter.h +++ b/storage/rocksdb/rdb_compact_filter.h @@ -167,7 +167,7 @@ public: sql_print_error("Decoding ttl from PK value failed in compaction filter, " "for index (%u,%u), val: %s", m_prev_index.cf_id, m_prev_index.index_id, buf.c_str()); - abort_with_stack_traces(); + abort(); } /* diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index 01dc2d6b176..a38711e6c6b 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -32,6 +32,7 @@ #include <limits> #include <map> #include <set> +#include <string> #include <utility> #include <vector> @@ -826,6 +827,25 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) { return changed; } +/* + @return Number of bytes that were changed +*/ +int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint &len) { + DBUG_ASSERT(packed_tuple != nullptr); + + int changed = 0; + uchar *p = packed_tuple + len - 1; + for (; p > packed_tuple; p--) { + changed++; + if (*p != uchar(0x00)) { + *p = *p - 1; + break; + } + *p = 0xFF; + } + return changed; +} + static const std::map<char, size_t> UNPACK_HEADER_SIZES = { {RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE}, {RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}}; @@ -1429,11 +1449,11 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, MY_BITMAP covered_bitmap; my_bitmap_map covered_bits; uint curr_bitmap_pos = 0; - bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false); const bool has_covered_bitmap = has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG); if (has_covered_bitmap) { + bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false); covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header + sizeof(RDB_UNPACK_COVERED_DATA_TAG) + RDB_UNPACK_COVERED_DATA_LEN_SIZE); @@ -1508,6 +1528,18 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, } if ((this->*fpi->m_skip_func)(fpi, field, &reader)) return HA_ERR_ROCKSDB_CORRUPT_DATA; + + // If this is a space padded varchar, we need to skip the indicator + // bytes for trailing bytes. They're useless since we can't restore the + // field anyway. + // + // There is a special case for prefixed varchars where we do not + // generate unpack info, because we know prefixed varchars cannot be + // unpacked. In this case, it is not necessary to skip. + if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad && + !fpi->m_unpack_info_stores_value) { + unp_reader.read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1); + } } } @@ -3487,6 +3519,20 @@ void Rdb_tbl_def::set_name(const std::string &name) { check_if_is_mysql_system_table(); } +GL_INDEX_ID Rdb_tbl_def::get_autoincr_gl_index_id() { + for (uint i = 0; i < m_key_count; i++) { + auto &k = m_key_descr_arr[i]; + if (k->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY || + k->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY) { + return k->get_gl_index_id(); + } + } + + // Every table must have a primary key, even if it's hidden. + abort(); + return GL_INDEX_ID(); +} + /* Static function of type my_hash_get_key that gets invoked by the m_ddl_hash object of type my_core::HASH. @@ -3714,6 +3760,68 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir, } /* + Validate that all auto increment values in the data dictionary are on a + supported version. +*/ +bool Rdb_ddl_manager::validate_auto_incr() { + std::unique_ptr<rocksdb::Iterator> it(m_dict->new_iterator()); + + uchar auto_incr_entry[Rdb_key_def::INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(auto_incr_entry, Rdb_key_def::AUTO_INC); + const rocksdb::Slice auto_incr_entry_slice( + reinterpret_cast<char *>(auto_incr_entry), + Rdb_key_def::INDEX_NUMBER_SIZE); + for (it->Seek(auto_incr_entry_slice); it->Valid(); it->Next()) { + const rocksdb::Slice key = it->key(); + const rocksdb::Slice val = it->value(); + GL_INDEX_ID gl_index_id; + + if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE && + memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) + break; + + if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3) { + return false; + } + + if (val.size() <= Rdb_key_def::VERSION_SIZE) { + return false; + } + + // Check if we have orphaned entries for whatever reason by cross + // referencing ddl entries. + auto ptr = reinterpret_cast<const uchar *>(key.data()); + ptr += Rdb_key_def::INDEX_NUMBER_SIZE; + rdb_netbuf_read_gl_index(&ptr, &gl_index_id); + if (!m_dict->get_index_info(gl_index_id, nullptr)) { + // NO_LINT_DEBUG + sql_print_warning("RocksDB: AUTOINC mismatch - " + "Index number (%u, %u) found in AUTOINC " + "but does not exist as a DDL entry", + gl_index_id.cf_id, gl_index_id.index_id); + return false; + } + + ptr = reinterpret_cast<const uchar *>(val.data()); + const int version = rdb_netbuf_read_uint16(&ptr); + if (version > Rdb_key_def::AUTO_INCREMENT_VERSION) { + // NO_LINT_DEBUG + sql_print_warning("RocksDB: AUTOINC mismatch - " + "Index number (%u, %u) found in AUTOINC " + "is on unsupported version %d", + gl_index_id.cf_id, gl_index_id.index_id, version); + return false; + } + } + + if (!it->status().ok()) { + return false; + } + + return true; +} + +/* Validate that all the tables in the RocksDB database dictionary match the .frm files in the datadir */ @@ -3877,10 +3985,18 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg, If validate_tables is greater than 0 run the validation. Only fail the initialzation if the setting is 1. If the setting is 2 we continue. */ - if (validate_tables > 0 && !validate_schemas()) { - if (validate_tables == 1) { - sql_print_error("RocksDB: Problems validating data dictionary " - "against .frm files, exiting"); + if (validate_tables > 0) { + std::string msg; + if (!validate_schemas()) { + msg = "RocksDB: Problems validating data dictionary " + "against .frm files, exiting"; + } else if (!validate_auto_incr()) { + msg = "RocksDB: Problems validating auto increment values in " + "data dictionary, exiting"; + } + if (validate_tables == 1 && !msg.empty()) { + // NO_LINT_DEBUG + sql_print_error("%s", msg.c_str()); return true; } } @@ -4154,6 +4270,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to, new_rec->m_auto_incr_val = rec->m_auto_incr_val.load(std::memory_order_relaxed); new_rec->m_key_descr_arr = rec->m_key_descr_arr; + // so that it's not free'd when deleting the old rec rec->m_key_descr_arr = nullptr; @@ -4613,13 +4730,16 @@ void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch, const GL_INDEX_ID &gl_index_id) const { delete_with_prefix(batch, Rdb_key_def::INDEX_INFO, gl_index_id); delete_with_prefix(batch, Rdb_key_def::INDEX_STATISTICS, gl_index_id); + delete_with_prefix(batch, Rdb_key_def::AUTO_INC, gl_index_id); } bool Rdb_dict_manager::get_index_info( const GL_INDEX_ID &gl_index_id, struct Rdb_index_info *const index_info) const { - index_info->m_gl_index_id = gl_index_id; + if (index_info) { + index_info->m_gl_index_id = gl_index_id; + } bool found = false; bool error = false; @@ -4630,6 +4750,10 @@ bool Rdb_dict_manager::get_index_info( const rocksdb::Status &status = get_value(key, &value); if (status.ok()) { + if (!index_info) { + return true; + } + const uchar *const val = (const uchar *)value.c_str(); const uchar *ptr = val; index_info->m_index_dict_version = rdb_netbuf_to_uint16(val); @@ -4668,6 +4792,11 @@ bool Rdb_dict_manager::get_index_info( index_info->m_kv_version = rdb_netbuf_to_uint16(ptr); ptr += RDB_SIZEOF_KV_VERSION; index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr); + if ((index_info->m_kv_version == + Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) && + index_info->m_ttl_duration > 0) { + index_info->m_index_flags = Rdb_key_def::TTL_FLAG; + } found = true; break; @@ -4709,7 +4838,7 @@ bool Rdb_dict_manager::get_index_info( "and it may be a bug.", index_info->m_index_dict_version, index_info->m_index_type, index_info->m_kv_version, index_info->m_ttl_duration); - abort_with_stack_traces(); + abort(); } return found; @@ -4972,7 +5101,7 @@ void Rdb_dict_manager::resume_drop_indexes() const { "bug.", max_index_id_in_dict, gl_index_id.cf_id, gl_index_id.index_id); - abort_with_stack_traces(); + abort(); } } } @@ -5021,7 +5150,7 @@ void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id, "from index id (%u,%u). MyRocks data dictionary may " "get corrupted.", gl_index_id.cf_id, gl_index_id.index_id); - abort_with_stack_traces(); + abort(); } } } @@ -5079,7 +5208,7 @@ void Rdb_dict_manager::add_stats( // IndexStats::materialize takes complete care of serialization including // storing the version const auto value = - Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it}, 1.); + Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it}); batch->Put(m_system_cfh, rocksdb::Slice((char *)key_buf, sizeof(key_buf)), value); @@ -5105,6 +5234,53 @@ Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const { return Rdb_index_stats(); } +rocksdb::Status +Rdb_dict_manager::put_auto_incr_val(rocksdb::WriteBatchBase *batch, + const GL_INDEX_ID &gl_index_id, + ulonglong val, bool overwrite) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; + dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id); + const rocksdb::Slice key = + rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)); + + // Value is constructed by storing the version and the value. + uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION + + ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0}; + uchar *ptr = value_buf; + rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION); + ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION; + rdb_netbuf_store_uint64(ptr, val); + ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE; + const rocksdb::Slice value = + rocksdb::Slice(reinterpret_cast<char *>(value_buf), ptr - value_buf); + + if (overwrite) { + return batch->Put(m_system_cfh, key, value); + } + return batch->Merge(m_system_cfh, key, value); +} + +bool Rdb_dict_manager::get_auto_incr_val(const GL_INDEX_ID &gl_index_id, + ulonglong *new_val) const { + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0}; + dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id); + + std::string value; + const rocksdb::Status status = get_value( + rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)), + &value); + + if (status.ok()) { + const uchar *const val = reinterpret_cast<const uchar *>(value.data()); + + if (rdb_netbuf_to_uint16(val) <= Rdb_key_def::AUTO_INCREMENT_VERSION) { + *new_val = rdb_netbuf_to_uint64(val + RDB_SIZEOF_AUTO_INCREMENT_VERSION); + return true; + } + } + return false; +} + uint Rdb_seq_generator::get_and_update_next_number( Rdb_dict_manager *const dict) { DBUG_ASSERT(dict != nullptr); diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 223f61edb43..f97c0d08d29 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -138,6 +138,7 @@ const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16); const size_t RDB_SIZEOF_INDEX_TYPE = sizeof(uchar); const size_t RDB_SIZEOF_KV_VERSION = sizeof(uint16); const size_t RDB_SIZEOF_INDEX_FLAGS = sizeof(uint32); +const size_t RDB_SIZEOF_AUTO_INCREMENT_VERSION = sizeof(uint16); // Possible return values for rdb_index_field_unpack_t functions. enum { @@ -237,17 +238,44 @@ public: *size = INDEX_NUMBER_SIZE; } + /* + Get the first key that you need to position at to start iterating. + + Stores into *key a "supremum" or "infimum" key value for the index. + + @return Number of bytes in the key that are usable for bloom filter use. + */ + inline int get_first_key(uchar *const key, uint *const size) const { + if (m_is_reverse_cf) + get_supremum_key(key, size); + else + get_infimum_key(key, size); + + /* Find out how many bytes of infimum are the same as m_index_number */ + uchar unmodified_key[INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(unmodified_key, m_index_number); + int i; + for (i = 0; i < INDEX_NUMBER_SIZE; i++) { + if (key[i] != unmodified_key[i]) + break; + } + return i; + } + /* Make a key that is right after the given key. */ static int successor(uchar *const packed_tuple, const uint &len); + /* Make a key that is right before the given key. */ + static int predecessor(uchar *const packed_tuple, const uint &len); + /* This can be used to compare prefixes. if X is a prefix of Y, then we consider that X = Y. */ // b describes the lookup key, which can be a prefix of a. + // b might be outside of the index_number range, if successor() is called. int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const { DBUG_ASSERT(covers_key(a)); - DBUG_ASSERT(covers_key(b)); return memcmp(a.data(), b.data(), std::min(a.size(), b.size())); } @@ -383,6 +411,7 @@ public: INDEX_STATISTICS = 6, MAX_INDEX_ID = 7, DDL_CREATE_INDEX_ONGOING = 8, + AUTO_INC = 9, END_DICT_INDEX_ID = 255 }; @@ -395,6 +424,7 @@ public: DDL_DROP_INDEX_ONGOING_VERSION = 1, MAX_INDEX_ID_VERSION = 1, DDL_CREATE_INDEX_ONGOING_VERSION = 1, + AUTO_INCREMENT_VERSION = 1, // Version for index stats is stored in IndexStats struct }; @@ -968,17 +998,17 @@ public: Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete; explicit Rdb_tbl_def(const std::string &name) - : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { + : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(name); } Rdb_tbl_def(const char *const name, const size_t &len) - : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { + : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(std::string(name, len)); } explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0) - : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { + : m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) { set_name(std::string(slice.data() + pos, slice.size() - pos)); } @@ -991,7 +1021,7 @@ public: std::shared_ptr<Rdb_key_def> *m_key_descr_arr; std::atomic<longlong> m_hidden_pk_val; - std::atomic<longlong> m_auto_incr_val; + std::atomic<ulonglong> m_auto_incr_val; /* Is this a system table */ bool m_is_mysql_system_table; @@ -1003,6 +1033,7 @@ public: const std::string &base_dbname() const { return m_dbname; } const std::string &base_tablename() const { return m_tablename; } const std::string &base_partition() const { return m_partition; } + GL_INDEX_ID get_autoincr_gl_index_id(); }; /* @@ -1115,6 +1146,8 @@ private: static void free_hash_elem(void *const data); bool validate_schemas(); + + bool validate_auto_incr(); }; /* @@ -1179,8 +1212,9 @@ private: 2. internal cf_id, index id => index information key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id - value: version, index_type, kv_format_version, ttl_duration + value: version, index_type, kv_format_version, index_flags, ttl_duration index_type is 1 byte, version and kv_format_version are 2 bytes. + index_flags is 4 bytes. ttl_duration is 8 bytes. 3. CF id => CF flags @@ -1209,6 +1243,11 @@ private: key: Rdb_key_def::DDL_CREATE_INDEX_ONGOING(0x8) + cf_id + index_id value: version + 9. auto_increment values + key: Rdb_key_def::AUTO_INC(0x9) + cf_id + index_id + value: version, {max auto_increment so far} + max auto_increment is 8 bytes + Data dictionary operations are atomic inside RocksDB. For example, when creating a table with two indexes, it is necessary to call Put three times. They have to be atomic. Rdb_dict_manager has a wrapper function @@ -1350,6 +1389,13 @@ public: void add_stats(rocksdb::WriteBatch *const batch, const std::vector<Rdb_index_stats> &stats) const; Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const; + + rocksdb::Status put_auto_incr_val(rocksdb::WriteBatchBase *batch, + const GL_INDEX_ID &gl_index_id, + ulonglong val, + bool overwrite = false) const; + bool get_auto_incr_val(const GL_INDEX_ID &gl_index_id, + ulonglong *new_val) const; }; struct Rdb_index_info { @@ -1361,6 +1407,109 @@ struct Rdb_index_info { uint64 m_ttl_duration = 0; }; +/* + @brief + Merge Operator for the auto_increment value in the system_cf + + @detail + This class implements the rocksdb Merge Operator for auto_increment values + that are stored to the data dictionary every transaction. + + The actual Merge function is triggered on compaction, memtable flushes, or + when get() is called on the same key. + + */ +class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator { + public: + /* + Updates the new value associated with a key to be the maximum of the + passed in value and the existing value. + + @param[IN] key + @param[IN] existing_value existing value for a key; nullptr if nonexistent + key + @param[IN] value + @param[OUT] new_value new value after Merge + @param[IN] logger + */ + bool Merge(const rocksdb::Slice &key, const rocksdb::Slice *existing_value, + const rocksdb::Slice &value, std::string *new_value, + rocksdb::Logger *logger) const override { + DBUG_ASSERT(new_value != nullptr); + + if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 || + GetKeyType(key) != Rdb_key_def::AUTO_INC || + value.size() != + RDB_SIZEOF_AUTO_INCREMENT_VERSION + ROCKSDB_SIZEOF_AUTOINC_VALUE || + GetVersion(value) > Rdb_key_def::AUTO_INCREMENT_VERSION) { + abort(); + } + + uint64_t merged_value = Deserialize(value); + + if (existing_value != nullptr) { + if (existing_value->size() != RDB_SIZEOF_AUTO_INCREMENT_VERSION + + ROCKSDB_SIZEOF_AUTOINC_VALUE || + GetVersion(*existing_value) > Rdb_key_def::AUTO_INCREMENT_VERSION) { + abort(); + } + + merged_value = std::max(merged_value, Deserialize(*existing_value)); + } + Serialize(merged_value, new_value); + return true; + } + + virtual const char *Name() const override { return "Rdb_system_merge_op"; } + + private: + /* + Serializes the integer data to the new_value buffer or the target buffer + the merge operator will update to + */ + void Serialize(const uint64_t data, std::string *new_value) const { + uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION + + ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0}; + uchar *ptr = value_buf; + /* fill in the auto increment version */ + rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION); + ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION; + /* fill in the auto increment value */ + rdb_netbuf_store_uint64(ptr, data); + ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE; + new_value->assign(reinterpret_cast<char *>(value_buf), ptr - value_buf); + } + + /* + Gets the value of auto_increment type in the data dictionary from the + value slice + + @Note Only to be used on data dictionary keys for the auto_increment type + */ + uint64_t Deserialize(const rocksdb::Slice &s) const { + return rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(s.data()) + + RDB_SIZEOF_AUTO_INCREMENT_VERSION); + } + + /* + Gets the type of the key of the key in the data dictionary. + + @Note Only to be used on data dictionary keys for the auto_increment type + */ + uint16_t GetKeyType(const rocksdb::Slice &s) const { + return rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(s.data())); + } + + /* + Gets the version of the auto_increment value in the data dictionary. + + @Note Only to be used on data dictionary value for the auto_increment type + */ + uint16_t GetVersion(const rocksdb::Slice &s) const { + return rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(s.data())); + } +}; + bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs); } // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc index 8d801dd430b..de33d69ddb5 100644 --- a/storage/rocksdb/rdb_i_s.cc +++ b/storage/rocksdb/rdb_i_s.cc @@ -797,7 +797,7 @@ static int rdb_i_s_global_info_fill_table( "from CF with id = %u. MyRocks data dictionary may " "be corrupted.", cf_handle->GetID()); - abort_with_stack_traces(); + abort(); } snprintf(cf_id_buf, INT_BUF_LEN, "%u", cf_handle->GetID()); @@ -928,7 +928,10 @@ enum { INDEX_NUMBER, INDEX_TYPE, KV_FORMAT_VERSION, - CF + TTL_DURATION, + INDEX_FLAGS, + CF, + AUTO_INCREMENT }; } // namespace RDB_DDL_FIELD @@ -943,7 +946,11 @@ static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = { ROCKSDB_FIELD_INFO("INDEX_TYPE", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0), ROCKSDB_FIELD_INFO("KV_FORMAT_VERSION", sizeof(uint16_t), MYSQL_TYPE_SHORT, 0), + ROCKSDB_FIELD_INFO("TTL_DURATION", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0), + ROCKSDB_FIELD_INFO("INDEX_FLAGS", sizeof(uint64), MYSQL_TYPE_LONGLONG, 0), ROCKSDB_FIELD_INFO("CF", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("AUTO_INCREMENT", sizeof(uint64_t), MYSQL_TYPE_LONGLONG, + MY_I_S_MAYBE_NULL | MY_I_S_UNSIGNED), ROCKSDB_FIELD_INFO_END}; int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { @@ -954,6 +961,7 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { DBUG_ASSERT(m_table != nullptr); Field **field = m_table->field; DBUG_ASSERT(field != nullptr); + const Rdb_dict_manager *dict_manager = rdb_get_dict_manager(); const std::string &dbname = tdef->base_dbname(); field[RDB_DDL_FIELD::TABLE_SCHEMA]->store(dbname.c_str(), dbname.size(), @@ -984,10 +992,20 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) { field[RDB_DDL_FIELD::INDEX_TYPE]->store(kd.m_index_type, true); field[RDB_DDL_FIELD::KV_FORMAT_VERSION]->store(kd.m_kv_format_version, true); + field[RDB_DDL_FIELD::TTL_DURATION]->store(kd.m_ttl_duration, true); + field[RDB_DDL_FIELD::INDEX_FLAGS]->store(kd.m_index_flags_bitmap, true); std::string cf_name = kd.get_cf()->GetName(); field[RDB_DDL_FIELD::CF]->store(cf_name.c_str(), cf_name.size(), system_charset_info); + ulonglong auto_incr; + if (dict_manager->get_auto_incr_val(tdef->get_autoincr_gl_index_id(), + &auto_incr)) { + field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_notnull(); + field[RDB_DDL_FIELD::AUTO_INCREMENT]->store(auto_incr, true); + } else { + field[RDB_DDL_FIELD::AUTO_INCREMENT]->set_null(); + } ret = my_core::schema_table_store_record(m_thd, m_table); if (ret) @@ -1495,6 +1513,117 @@ static int rdb_i_s_trx_info_init(void *const p) { DBUG_RETURN(0); } +/* + Support for INFORMATION_SCHEMA.ROCKSDB_DEADLOCK dynamic table + */ +namespace RDB_DEADLOCK_FIELD { +enum { + DEADLOCK_ID = 0, + TRANSACTION_ID, + CF_NAME, + WAITING_KEY, + LOCK_TYPE, + INDEX_NAME, + TABLE_NAME, + ROLLED_BACK +}; +} // namespace RDB_TRX_FIELD + +static ST_FIELD_INFO rdb_i_s_deadlock_info_fields_info[] = { + ROCKSDB_FIELD_INFO("DEADLOCK_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("WAITING_KEY", FN_REFLEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("LOCK_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("INDEX_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("TABLE_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0), + ROCKSDB_FIELD_INFO("ROLLED_BACK", sizeof(ulonglong), MYSQL_TYPE_LONGLONG, + 0), + ROCKSDB_FIELD_INFO_END}; + +/* Fill the information_schema.rocksdb_trx virtual table */ +static int rdb_i_s_deadlock_info_fill_table( + my_core::THD *const thd, my_core::TABLE_LIST *const tables, + my_core::Item *const cond MY_ATTRIBUTE((__unused__))) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(tables != nullptr); + DBUG_ASSERT(tables->table != nullptr); + DBUG_ASSERT(tables->table->field != nullptr); + + static const std::string str_exclusive("EXCLUSIVE"); + static const std::string str_shared("SHARED"); + + int ret = 0; + rocksdb::DB *const rdb = rdb_get_rocksdb_db(); + + if (!rdb) { + DBUG_RETURN(ret); + } + + const std::vector<Rdb_deadlock_info> &all_dl_info = rdb_get_deadlock_info(); + + ulonglong id = 0; + for (const auto &info : all_dl_info) { + for (const auto &trx_info : info.path) { + tables->table->field[RDB_DEADLOCK_FIELD::DEADLOCK_ID]->store(id, true); + tables->table->field[RDB_DEADLOCK_FIELD::TRANSACTION_ID]->store( + trx_info.trx_id, true); + tables->table->field[RDB_DEADLOCK_FIELD::CF_NAME]->store( + trx_info.cf_name.c_str(), trx_info.cf_name.length(), + system_charset_info); + tables->table->field[RDB_DEADLOCK_FIELD::WAITING_KEY]->store( + trx_info.waiting_key.c_str(), trx_info.waiting_key.length(), + system_charset_info); + if (trx_info.exclusive_lock) { + tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store( + str_exclusive.c_str(), str_exclusive.length(), system_charset_info); + } else { + tables->table->field[RDB_DEADLOCK_FIELD::LOCK_TYPE]->store( + str_shared.c_str(), str_shared.length(), system_charset_info); + } + tables->table->field[RDB_DEADLOCK_FIELD::INDEX_NAME]->store( + trx_info.index_name.c_str(), trx_info.index_name.length(), + system_charset_info); + tables->table->field[RDB_DEADLOCK_FIELD::TABLE_NAME]->store( + trx_info.table_name.c_str(), trx_info.table_name.length(), + system_charset_info); + tables->table->field[RDB_DEADLOCK_FIELD::ROLLED_BACK]->store( + trx_info.trx_id == info.victim_trx_id, true); + + /* Tell MySQL about this row in the virtual table */ + ret = static_cast<int>( + my_core::schema_table_store_record(thd, tables->table)); + + if (ret != 0) { + break; + } + } + id++; + } + + DBUG_RETURN(ret); +} + +/* Initialize the information_schema.rocksdb_trx_info virtual table */ +static int rdb_i_s_deadlock_info_init(void *const p) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(p != nullptr); + + my_core::ST_SCHEMA_TABLE *schema; + + schema = (my_core::ST_SCHEMA_TABLE *)p; + + schema->fields_info = rdb_i_s_deadlock_info_fields_info; + schema->fill_table = rdb_i_s_deadlock_info_fill_table; + + DBUG_RETURN(0); +} + static int rdb_i_s_deinit(void *p MY_ATTRIBUTE((__unused__))) { DBUG_ENTER_FUNC(); DBUG_RETURN(0); @@ -1678,4 +1807,20 @@ struct st_maria_plugin rdb_i_s_trx_info = { nullptr, /* config options */ MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL }; + +struct st_maria_plugin rdb_i_s_deadlock_info = { + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &rdb_i_s_info, + "ROCKSDB_DEADLOCK", + "Facebook", + "RocksDB transaction information", + PLUGIN_LICENSE_GPL, + rdb_i_s_deadlock_info_init, + nullptr, + 0x0001, /* version number (0.1) */ + nullptr, /* status variables */ + nullptr, /* system variables */ + nullptr, /* config options */ + MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL +}; } // namespace myrocks diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h index 08d35e17ba9..d6a48bf3fec 100644 --- a/storage/rocksdb/rdb_i_s.h +++ b/storage/rocksdb/rdb_i_s.h @@ -32,4 +32,5 @@ extern struct st_maria_plugin rdb_i_s_ddl; extern struct st_maria_plugin rdb_i_s_index_file_map; extern struct st_maria_plugin rdb_i_s_lock_info; extern struct st_maria_plugin rdb_i_s_trx_info; +extern struct st_maria_plugin rdb_i_s_deadlock_info; } // namespace myrocks diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc index 039b0d7baf1..f09efefcd2a 100644 --- a/storage/rocksdb/rdb_io_watchdog.cc +++ b/storage/rocksdb/rdb_io_watchdog.cc @@ -45,7 +45,7 @@ void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) { "Shutting the service down.", m_write_timeout); - abort_with_stack_traces(); + abort(); } void Rdb_io_watchdog::io_check_callback(union sigval timer_data) { diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc index d126d156314..0c561c62ab2 100644 --- a/storage/rocksdb/rdb_perf_context.cc +++ b/storage/rocksdb/rdb_perf_context.cc @@ -47,8 +47,13 @@ std::string rdb_pc_stat_types[] = { "BLOCK_READ_TIME", "BLOCK_CHECKSUM_TIME", "BLOCK_DECOMPRESS_TIME", + "GET_READ_BYTES", + "MULTIGET_READ_BYTES", + "ITER_READ_BYTES", "INTERNAL_KEY_SKIPPED_COUNT", "INTERNAL_DELETE_SKIPPED_COUNT", + "INTERNAL_RECENT_SKIPPED_COUNT", + "INTERNAL_MERGE_COUNT", "GET_SNAPSHOT_TIME", "GET_FROM_MEMTABLE_TIME", "GET_FROM_MEMTABLE_COUNT", @@ -56,9 +61,12 @@ std::string rdb_pc_stat_types[] = { "GET_FROM_OUTPUT_FILES_TIME", "SEEK_ON_MEMTABLE_TIME", "SEEK_ON_MEMTABLE_COUNT", + "NEXT_ON_MEMTABLE_COUNT", + "PREV_ON_MEMTABLE_COUNT", "SEEK_CHILD_SEEK_TIME", "SEEK_CHILD_SEEK_COUNT", - "SEEK_IN_HEAP_TIME", + "SEEK_MIN_HEAP_TIME", + "SEEK_MAX_HEAP_TIME", "SEEK_INTERNAL_SEEK_TIME", "FIND_NEXT_USER_ENTRY_TIME", "WRITE_WAL_TIME", @@ -74,6 +82,12 @@ std::string rdb_pc_stat_types[] = { "NEW_TABLE_ITERATOR_NANOS", "BLOCK_SEEK_NANOS", "FIND_TABLE_NANOS", + "BLOOM_MEMTABLE_HIT_COUNT", + "BLOOM_MEMTABLE_MISS_COUNT", + "BLOOM_SST_HIT_COUNT", + "BLOOM_SST_MISS_COUNT", + "KEY_LOCK_WAIT_TIME", + "KEY_LOCK_WAIT_COUNT", "IO_THREAD_POOL_ID", "IO_BYTES_WRITTEN", "IO_BYTES_READ", @@ -107,8 +121,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { IO_PERF_RECORD(block_read_time); IO_PERF_RECORD(block_checksum_time); IO_PERF_RECORD(block_decompress_time); + IO_PERF_RECORD(get_read_bytes); + IO_PERF_RECORD(multiget_read_bytes); + IO_PERF_RECORD(iter_read_bytes); IO_PERF_RECORD(internal_key_skipped_count); IO_PERF_RECORD(internal_delete_skipped_count); + IO_PERF_RECORD(internal_recent_skipped_count); + IO_PERF_RECORD(internal_merge_count); IO_PERF_RECORD(get_snapshot_time); IO_PERF_RECORD(get_from_memtable_time); IO_PERF_RECORD(get_from_memtable_count); @@ -116,9 +135,12 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { IO_PERF_RECORD(get_from_output_files_time); IO_PERF_RECORD(seek_on_memtable_time); IO_PERF_RECORD(seek_on_memtable_count); + IO_PERF_RECORD(next_on_memtable_count); + IO_PERF_RECORD(prev_on_memtable_count); IO_PERF_RECORD(seek_child_seek_time); IO_PERF_RECORD(seek_child_seek_count); IO_PERF_RECORD(seek_min_heap_time); + IO_PERF_RECORD(seek_max_heap_time); IO_PERF_RECORD(seek_internal_seek_time); IO_PERF_RECORD(find_next_user_entry_time); IO_PERF_RECORD(write_wal_time); @@ -134,6 +156,13 @@ static void harvest_diffs(Rdb_atomic_perf_counters *const counters) { IO_PERF_RECORD(new_table_iterator_nanos); IO_PERF_RECORD(block_seek_nanos); IO_PERF_RECORD(find_table_nanos); + IO_PERF_RECORD(bloom_memtable_hit_count); + IO_PERF_RECORD(bloom_memtable_miss_count); + IO_PERF_RECORD(bloom_sst_hit_count); + IO_PERF_RECORD(bloom_sst_miss_count); + IO_PERF_RECORD(key_lock_wait_time); + IO_PERF_RECORD(key_lock_wait_count); + IO_STAT_RECORD(thread_pool_id); IO_STAT_RECORD(bytes_written); IO_STAT_RECORD(bytes_read); diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h index f9b9fd48d3e..2aca3dc3bfd 100644 --- a/storage/rocksdb/rdb_perf_context.h +++ b/storage/rocksdb/rdb_perf_context.h @@ -37,8 +37,13 @@ enum { PC_BLOCK_READ_TIME, PC_BLOCK_CHECKSUM_TIME, PC_BLOCK_DECOMPRESS_TIME, + PC_GET_READ_BYTES, + PC_MULTIGET_READ_BYTES, + PC_ITER_READ_BYTES, PC_KEY_SKIPPED, PC_DELETE_SKIPPED, + PC_RECENT_SKIPPED, + PC_MERGE, PC_GET_SNAPSHOT_TIME, PC_GET_FROM_MEMTABLE_TIME, PC_GET_FROM_MEMTABLE_COUNT, @@ -46,9 +51,12 @@ enum { PC_GET_FROM_OUTPUT_FILES_TIME, PC_SEEK_ON_MEMTABLE_TIME, PC_SEEK_ON_MEMTABLE_COUNT, + PC_NEXT_ON_MEMTABLE_COUNT, + PC_PREV_ON_MEMTABLE_COUNT, PC_SEEK_CHILD_SEEK_TIME, PC_SEEK_CHILD_SEEK_COUNT, PC_SEEK_MIN_HEAP_TIME, + PC_SEEK_MAX_HEAP_TIME, PC_SEEK_INTERNAL_SEEK_TIME, PC_FIND_NEXT_USER_ENTRY_TIME, PC_WRITE_WAL_TIME, @@ -64,6 +72,12 @@ enum { PC_NEW_TABLE_ITERATOR_NANOS, PC_BLOCK_SEEK_NANOS, PC_FIND_TABLE_NANOS, + PC_BLOOM_MEMTABLE_HIT_COUNT, + PC_BLOOM_MEMTABLE_MISS_COUNT, + PC_BLOOM_SST_HIT_COUNT, + PC_BLOOM_SST_MISS_COUNT, + PC_KEY_LOCK_WAIT_TIME, + PC_KEY_LOCK_WAIT_COUNT, PC_IO_THREAD_POOL_ID, PC_IO_BYTES_WRITTEN, PC_IO_BYTES_READ, diff --git a/storage/rocksdb/rdb_psi.cc b/storage/rocksdb/rdb_psi.cc index b6bc89a02f9..b5309df5973 100644 --- a/storage/rocksdb/rdb_psi.cc +++ b/storage/rocksdb/rdb_psi.cc @@ -48,7 +48,7 @@ my_core::PSI_thread_info all_rocksdb_threads[] = { my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key, rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key, key_mutex_tx_list, rdb_sysvars_psi_mutex_key, - rdb_cfm_mutex_key; + rdb_cfm_mutex_key, rdb_sst_commit_key; my_core::PSI_mutex_info all_rocksdb_mutexes[] = { {&rdb_psi_open_tbls_mutex_key, "open tables", PSI_FLAG_GLOBAL}, @@ -60,6 +60,7 @@ my_core::PSI_mutex_info all_rocksdb_mutexes[] = { {&key_mutex_tx_list, "tx_list", PSI_FLAG_GLOBAL}, {&rdb_sysvars_psi_mutex_key, "setting sysvar", PSI_FLAG_GLOBAL}, {&rdb_cfm_mutex_key, "column family manager", PSI_FLAG_GLOBAL}, + {&rdb_sst_commit_key, "sst commit", PSI_FLAG_GLOBAL}, }; my_core::PSI_rwlock_key key_rwlock_collation_exception_list, diff --git a/storage/rocksdb/rdb_psi.h b/storage/rocksdb/rdb_psi.h index 0a62f411ade..d4318ee3dba 100644 --- a/storage/rocksdb/rdb_psi.h +++ b/storage/rocksdb/rdb_psi.h @@ -40,7 +40,8 @@ extern my_core::PSI_thread_key rdb_background_psi_thread_key, extern my_core::PSI_mutex_key rdb_psi_open_tbls_mutex_key, rdb_signal_bg_psi_mutex_key, rdb_signal_drop_idx_psi_mutex_key, rdb_collation_data_mutex_key, rdb_mem_cmp_space_mutex_key, - key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key; + key_mutex_tx_list, rdb_sysvars_psi_mutex_key, rdb_cfm_mutex_key, + rdb_sst_commit_key; extern my_core::PSI_rwlock_key key_rwlock_collation_exception_list, key_rwlock_read_free_rpl_tables, key_rwlock_skip_unique_check_tables; diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc index 72abfab5d6b..e0dfb011f87 100644 --- a/storage/rocksdb/rdb_sst_info.cc +++ b/storage/rocksdb/rdb_sst_info.cc @@ -43,6 +43,7 @@ #include "./ha_rocksdb.h" #include "./ha_rocksdb_proto.h" #include "./rdb_cf_options.h" +#include "./rdb_psi.h" namespace myrocks { @@ -262,7 +263,6 @@ rocksdb::Status Rdb_sst_file_ordered::put(const rocksdb::Slice &key, if (!m_first_key.empty()) { rocksdb::Slice first_key_slice(m_first_key); int cmp = m_file.compare(first_key_slice, key); - DBUG_ASSERT(cmp != 0); m_use_stack = (cmp > 0); // Apply the first key to the stack or SST @@ -326,11 +326,11 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, const rocksdb::DBOptions &db_options, const bool &tracing) : m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0), - m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), + m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), m_committed(false), #if defined(RDB_SST_INFO_USE_THREAD) m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false), #endif - m_sst_file(nullptr), m_tracing(tracing) { + m_sst_file(nullptr), m_tracing(tracing), m_print_client_error(true) { m_prefix = db->GetName() + "/"; std::string normalized_table; @@ -357,6 +357,7 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename, // Set the maximum size to 3 times the cf's target size m_max_size = cf_descr.options.target_file_size_base * 3; } + mysql_mutex_init(rdb_sst_commit_key, &m_commit_mutex, MY_MUTEX_INIT_FAST); } Rdb_sst_info::~Rdb_sst_info() { @@ -364,6 +365,7 @@ Rdb_sst_info::~Rdb_sst_info() { #if defined(RDB_SST_INFO_USE_THREAD) DBUG_ASSERT(m_thread == nullptr); #endif + mysql_mutex_destroy(&m_commit_mutex); } int Rdb_sst_info::open_new_sst_file() { @@ -428,6 +430,8 @@ void Rdb_sst_info::close_curr_sst_file() { int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) { int rc; + DBUG_ASSERT(!m_committed); + if (m_curr_size + key.size() + value.size() >= m_max_size) { // The current sst file has reached its maximum, close it out close_curr_sst_file(); @@ -461,7 +465,21 @@ int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) { return HA_EXIT_SUCCESS; } -int Rdb_sst_info::commit() { +int Rdb_sst_info::commit(bool print_client_error) { + int ret = HA_EXIT_SUCCESS; + + // Both the transaction clean up and the ha_rocksdb handler have + // references to this Rdb_sst_info and both can call commit, so + // synchronize on the object here. + RDB_MUTEX_LOCK_CHECK(m_commit_mutex); + + if (m_committed) { + RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex); + return ret; + } + + m_print_client_error = print_client_error; + if (m_curr_size > 0) { // Close out any existing files close_curr_sst_file(); @@ -480,16 +498,24 @@ int Rdb_sst_info::commit() { } #endif + m_committed = true; + RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex); + // Did we get any errors? if (have_background_error()) { - return get_and_reset_background_error(); + ret = get_and_reset_background_error(); } - return HA_EXIT_SUCCESS; + m_print_client_error = true; + return ret; } void Rdb_sst_info::set_error_msg(const std::string &sst_file_name, const rocksdb::Status &s) { + + if (!m_print_client_error) + return; + #if defined(RDB_SST_INFO_USE_THREAD) // Both the foreground and background threads can set the error message // so lock the mutex to protect it. We only want the first error that diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h index 1dee0fd0518..42f6458e46b 100644 --- a/storage/rocksdb/rdb_sst_info.h +++ b/storage/rocksdb/rdb_sst_info.h @@ -128,6 +128,8 @@ class Rdb_sst_info { std::string m_prefix; static std::atomic<uint64_t> m_prefix_counter; static std::string m_suffix; + bool m_committed; + mysql_mutex_t m_commit_mutex; #if defined(RDB_SST_INFO_USE_THREAD) std::queue<Rdb_sst_file_ordered *> m_queue; std::mutex m_mutex; @@ -137,6 +139,7 @@ class Rdb_sst_info { #endif Rdb_sst_file_ordered *m_sst_file; const bool m_tracing; + bool m_print_client_error; int open_new_sst_file(); void close_curr_sst_file(); @@ -157,7 +160,8 @@ class Rdb_sst_info { ~Rdb_sst_info(); int put(const rocksdb::Slice &key, const rocksdb::Slice &value); - int commit(); + int commit(bool print_client_error = true); + bool is_committed() const { return m_committed; } bool have_background_error() { return m_background_error != 0; } diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc index 335676a6ba4..723e079a165 100644 --- a/storage/rocksdb/rdb_utils.cc +++ b/storage/rocksdb/rdb_utils.cc @@ -352,4 +352,37 @@ const char *get_rocksdb_supported_compression_types() return compression_methods_buf.c_str(); } +bool rdb_check_rocksdb_corruption() { + return !my_access(myrocks::rdb_corruption_marker_file_name().c_str(), F_OK); +} + +void rdb_persist_corruption_marker() { + const std::string &fileName(myrocks::rdb_corruption_marker_file_name()); + /* O_SYNC is not supported on windows */ + int fd = my_open(fileName.c_str(), O_CREAT | IF_WIN(0, O_SYNC), MYF(MY_WME)); + if (fd < 0) { + sql_print_error("RocksDB: Can't create file %s to mark rocksdb as " + "corrupted.", + fileName.c_str()); + } else { + sql_print_information("RocksDB: Creating the file %s to abort mysqld " + "restarts. Remove this file from the data directory " + "after fixing the corruption to recover. ", + fileName.c_str()); + } + +#ifdef _WIN32 + /* A replacement for O_SYNC flag above */ + if (fd >= 0) + my_sync(fd, MYF(0)); +#endif + + int ret = my_close(fd, MYF(MY_WME)); + if (ret) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Error (%d) closing the file %s", ret, + fileName.c_str()); + } +} + } // namespace myrocks diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h index 3feda5d82ad..3125941ee78 100644 --- a/storage/rocksdb/rdb_utils.h +++ b/storage/rocksdb/rdb_utils.h @@ -84,7 +84,7 @@ namespace myrocks { do { \ if (!(expr)) { \ my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \ - abort_with_stack_traces(); \ + abort(); \ } \ } while (0) #endif // SHIP_ASSERT @@ -250,12 +250,20 @@ inline void rdb_check_mutex_call_result(const char *function_name, // This will hopefully result in a meaningful stack trace which we can use // to efficiently debug the root cause. - abort_with_stack_traces(); + abort(); } } void rdb_log_status_error(const rocksdb::Status &s, const char *msg = nullptr); +// return true if the marker file exists which indicates that the corruption +// has been detected +bool rdb_check_rocksdb_corruption(); + +// stores a marker file in the data directory so that after restart server +// is still aware that rocksdb data is corrupted +void rdb_persist_corruption_marker(); + /* Helper functions to parse strings. */ diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb -Subproject 9a970c81af9807071bd690f4c808c5045866291 +Subproject ba295cda29daee3ffe58549542804efdfd96978 |