summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2019-06-21 12:56:26 +0300
committerSergei Petrunia <psergey@askmonty.org>2019-06-21 12:56:26 +0300
commit821b866b5502511fe6fcba9503c5eb5790c393a9 (patch)
tree0ca7a38801310ff0231efbc31340bbc66fe033cf
parentc631bd7f1099b7f60b319c3c6093869116bed2d2 (diff)
parent7345c0de26a8e9a88c88a5b5502beb3250b9429e (diff)
downloadmariadb-git-821b866b5502511fe6fcba9503c5eb5790c393a9.tar.gz
Merge branch 'bb-10.2-myrocks-merge' into 10.2
-rw-r--r--storage/rocksdb/.clang-format96
-rw-r--r--storage/rocksdb/CMakeLists.txt26
-rw-r--r--storage/rocksdb/build_rocksdb.cmake178
-rw-r--r--storage/rocksdb/event_listener.cc9
-rw-r--r--storage/rocksdb/event_listener.h6
-rw-r--r--storage/rocksdb/ha_rocksdb.cc3950
-rw-r--r--storage/rocksdb/ha_rocksdb.h651
-rw-r--r--storage/rocksdb/ha_rocksdb_proto.h4
-rw-r--r--storage/rocksdb/logger.h6
-rwxr-xr-xstorage/rocksdb/myrocks_hotbackup.py20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/combinations1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc298
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc1425
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result (renamed from storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result)8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result87
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result693
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result693
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/check_flags.result66
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result154
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/delete_before_lock.result22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result38
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result3503
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result203
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue884.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue896.result17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue900.result11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result128
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result98
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result603
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result335
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result84
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_read_free.result321
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_engine.result16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/statistics.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result620
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf (renamed from storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.cnf)0
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc (renamed from storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.test)8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc213
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/check_flags.test117
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/delete_before_lock.test36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/disabled.def13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test104
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue884.test43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue896.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue900.test13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test62
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test96
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test88
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc106
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test52
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf (renamed from storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.cnf)6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test414
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc69
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test78
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.test302
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc102
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test83
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/unique_check.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test36
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test1
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh8
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test30
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/combinations6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result282
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result165
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result361
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result43
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/combinations2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test149
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result58
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test75
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test11
-rw-r--r--storage/rocksdb/nosql_access.cc52
-rw-r--r--storage/rocksdb/nosql_access.h36
-rw-r--r--storage/rocksdb/properties_collector.cc96
-rw-r--r--storage/rocksdb/properties_collector.h53
-rw-r--r--storage/rocksdb/rdb_buff.h138
-rw-r--r--storage/rocksdb/rdb_cf_manager.cc112
-rw-r--r--storage/rocksdb/rdb_cf_manager.h12
-rw-r--r--storage/rocksdb/rdb_cf_options.cc70
-rw-r--r--storage/rocksdb/rdb_cf_options.h18
-rw-r--r--storage/rocksdb/rdb_compact_filter.h32
-rw-r--r--storage/rocksdb/rdb_comparator.h6
-rw-r--r--storage/rocksdb/rdb_converter.cc838
-rw-r--r--storage/rocksdb/rdb_converter.h247
-rw-r--r--storage/rocksdb/rdb_datadic.cc1900
-rw-r--r--storage/rocksdb/rdb_datadic.h518
-rw-r--r--storage/rocksdb/rdb_global.h392
-rw-r--r--storage/rocksdb/rdb_i_s.cc117
-rw-r--r--storage/rocksdb/rdb_i_s.h2
-rw-r--r--storage/rocksdb/rdb_index_merge.cc20
-rw-r--r--storage/rocksdb/rdb_index_merge.h15
-rw-r--r--storage/rocksdb/rdb_io_watchdog.cc11
-rw-r--r--storage/rocksdb/rdb_io_watchdog.h14
-rw-r--r--storage/rocksdb/rdb_mariadb_server_port.cc4
-rw-r--r--storage/rocksdb/rdb_mutex_wrapper.cc19
-rw-r--r--storage/rocksdb/rdb_mutex_wrapper.h40
-rw-r--r--storage/rocksdb/rdb_perf_context.cc24
-rw-r--r--storage/rocksdb/rdb_perf_context.h13
-rw-r--r--storage/rocksdb/rdb_psi.cc16
-rw-r--r--storage/rocksdb/rdb_psi.h3
-rw-r--r--storage/rocksdb/rdb_sst_info.cc233
-rw-r--r--storage/rocksdb/rdb_sst_info.h119
-rw-r--r--storage/rocksdb/rdb_threads.cc10
-rw-r--r--storage/rocksdb/rdb_threads.h23
-rw-r--r--storage/rocksdb/rdb_utils.cc26
-rw-r--r--storage/rocksdb/rdb_utils.h61
m---------storage/rocksdb/rocksdb0
230 files changed, 20003 insertions, 5241 deletions
diff --git a/storage/rocksdb/.clang-format b/storage/rocksdb/.clang-format
index d80b012dd4b..b1df76bdf2d 100644
--- a/storage/rocksdb/.clang-format
+++ b/storage/rocksdb/.clang-format
@@ -1,23 +1,49 @@
----
+# Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2.0,
+# as published by the Free Software Foundation.
+#
+# This program is also distributed with certain software (including
+# but not limited to OpenSSL) that is licensed under separate terms,
+# as designated in a particular file or component or in included license
+# documentation. The authors of MySQL hereby grant you an additional
+# permission to link the program and your derivative works with the
+# separately licensed software that they have included with MySQL.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License, version 2.0, for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+# This is the output of clang-format-5.0 --style=google --dump-config,
+# except for changes mentioned below. We lock the style so that any newer
+# version of clang-format will give the same result; as time goes, we may
+# update this list, requiring newer versions of clang-format.
+
Language: Cpp
-# BasedOnStyle: LLVM
-AccessModifierOffset: -2
+# BasedOnStyle: Google
+AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
-AlignEscapedNewlinesLeft: false
+AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: false
-AllowShortLoopsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: false
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
@@ -32,62 +58,80 @@ BraceWrapping:
BeforeCatch: false
BeforeElse: false
IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
-DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
-ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
IncludeCategories:
- - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
+ - Regex: '^<.*\.h>'
+ Priority: 1
+ - Regex: '^<.*'
Priority: 2
- - Regex: '^(<|"(gtest|isl|json)/)'
- Priority: 3
- Regex: '.*'
- Priority: 1
-IncludeIsMainRegex: '$'
-IndentCaseLabels: false
+ Priority: 3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: true
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakBeforeFirstCallParameter: 19
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 60
-PointerAlignment: Right
+PenaltyReturnTypeOnItsOwnLine: 200
ReflowComments: true
SortIncludes: true
+SortUsingDeclarations: true
SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
+SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
-Standard: Cpp11
TabWidth: 8
UseTab: Never
-JavaScriptQuotes: Leave
-...
+
+# We declare one specific pointer style since right alignment is dominant in
+# the MySQL code base (default --style=google has DerivePointerAlignment true).
+DerivePointerAlignment: false
+PointerAlignment: Right
+
+# MySQL source code is allowed to use C++11 features.
+Standard: Cpp11
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
index 70010ee57e3..fa564a603e6 100644
--- a/storage/rocksdb/CMakeLists.txt
+++ b/storage/rocksdb/CMakeLists.txt
@@ -100,6 +100,8 @@ SET(ROCKSDB_SE_SOURCES
rdb_psi.cc
rdb_sst_info.cc
rdb_sst_info.h
+ rdb_converter.cc
+ rdb_converter.h
)
# MariaDB: the following is added in build_rocksdb.cmake, when appropriate:
@@ -149,6 +151,7 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib
rdb_perf_context.h
rdb_buff.h
rdb_mariadb_port.h
+ nosql_access.cc nosql_access.h
)
ADD_DEPENDENCIES(rocksdb_aux_lib GenError)
@@ -159,6 +162,27 @@ if (UNIX AND NOT APPLE)
TARGET_LINK_LIBRARIES(rocksdb_aux_lib -lrt)
endif()
+# IF (WITH_JEMALLOC)
+# FIND_LIBRARY(JEMALLOC_LIBRARY
+# NAMES libjemalloc${PIC_EXT}.a jemalloc
+# HINTS ${WITH_JEMALLOC}/lib)
+# SET(rocksdb_static_libs ${rocksdb_static_libs}
+# ${JEMALLOC_LIBRARY})
+# ADD_DEFINITIONS(-DROCKSDB_JEMALLOC)
+# ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE)
+# ENDIF()
+
+# MariaDB: Q: why does the upstream add libunwind for a particular
+# storage engine?
+#IF (WITH_UNWIND)
+# FIND_LIBRARY(UNWIND_LIBRARY
+# NAMES libunwind${PIC_EXT}.a unwind
+# HINTS ${WITH_UNWIND}/lib)
+# SET(rocksdb_static_libs ${rocksdb_static_libs}
+# ${UNWIND_LIBRARY})
+#ENDIF()
+
+
TARGET_LINK_LIBRARIES(rocksdb rocksdb_aux_lib)
FIND_LIBRARY(LZ4_LIBRARY
NAMES liblz4${PIC_EXT}.a lz4
@@ -187,6 +211,8 @@ ENDIF()
CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
IF(HAVE_SCHED_GETCPU)
ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
+# MariaDB: don't do this:
+# ADD_DEFINITIONS(-DZSTD_STATIC_LINKING_ONLY)
ENDIF()
IF (WITH_TBB)
diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake
index eb7a7fed6c1..49d249c0eb3 100644
--- a/storage/rocksdb/build_rocksdb.cmake
+++ b/storage/rocksdb/build_rocksdb.cmake
@@ -21,11 +21,13 @@ else()
if(WITH_ROCKSDB_JEMALLOC)
find_package(JeMalloc REQUIRED)
add_definitions(-DROCKSDB_JEMALLOC)
+ ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE)
include_directories(${JEMALLOC_INCLUDE_DIR})
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
# FreeBSD has jemaloc as default malloc
add_definitions(-DROCKSDB_JEMALLOC)
+ ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE)
set(WITH_JEMALLOC ON)
endif()
endif()
@@ -160,7 +162,7 @@ find_package(Threads REQUIRED)
if(WIN32)
set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib)
else()
- set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT})
+ set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT} ${LIBDL})
endif()
set(ROCKSDB_LIBS rocksdblib})
@@ -178,24 +180,27 @@ set(ROCKSDB_SOURCES
db/c.cc
db/column_family.cc
db/compacted_db_impl.cc
- db/compaction.cc
- db/compaction_iterator.cc
- db/compaction_job.cc
- db/compaction_picker.cc
- db/compaction_picker_universal.cc
+ db/compaction/compaction.cc
+ db/compaction/compaction_iterator.cc
+ db/compaction/compaction_job.cc
+ db/compaction/compaction_picker.cc
+ db/compaction/compaction_picker_fifo.cc
+ db/compaction/compaction_picker_level.cc
+ db/compaction/compaction_picker_universal.cc
db/convenience.cc
db/db_filesnapshot.cc
- db/db_impl.cc
- db/db_impl_compaction_flush.cc
- db/db_impl_debug.cc
- db/db_impl_experimental.cc
- db/db_impl_files.cc
- db/db_impl_open.cc
- db/db_impl_readonly.cc
- db/db_impl_write.cc
+ db/dbformat.cc
+ db/db_impl/db_impl.cc
+ db/db_impl/db_impl_compaction_flush.cc
+ db/db_impl/db_impl_debug.cc
+ db/db_impl/db_impl_experimental.cc
+ db/db_impl/db_impl_files.cc
+ db/db_impl/db_impl_open.cc
+ db/db_impl/db_impl_readonly.cc
+ db/db_impl/db_impl_secondary.cc
+ db/db_impl/db_impl_write.cc
db/db_info_dumper.cc
db/db_iter.cc
- db/dbformat.cc
db/error_handler.cc
db/event_helpers.cc
db/experimental.cc
@@ -204,17 +209,18 @@ set(ROCKSDB_SOURCES
db/flush_job.cc
db/flush_scheduler.cc
db/forward_iterator.cc
+ db/in_memory_stats_history.cc
db/internal_stats.cc
db/log_reader.cc
- db/log_writer.cc
db/logs_with_prep_tracker.cc
+ db/log_writer.cc
db/malloc_stats.cc
- db/managed_iterator.cc
db/memtable.cc
db/memtable_list.cc
db/merge_helper.cc
db/merge_operator.cc
db/range_del_aggregator.cc
+ db/range_tombstone_fragmenter.cc
db/repair.cc
db/snapshot_impl.cc
db/table_cache.cc
@@ -224,24 +230,29 @@ set(ROCKSDB_SOURCES
db/version_edit.cc
db/version_set.cc
db/wal_manager.cc
- db/write_batch.cc
db/write_batch_base.cc
+ db/write_batch.cc
db/write_controller.cc
db/write_thread.cc
env/env.cc
env/env_chroot.cc
env/env_hdfs.cc
env/mock_env.cc
+ file/delete_scheduler.cc
+ file/filename.cc
+ file/file_util.cc
+ file/sst_file_manager_impl.cc
+ logging/auto_roll_logger.cc
+ logging/event_logger.cc
+ logging/log_buffer.cc
+ memory/arena.cc
+ memory/concurrent_arena.cc
+ memory/jemalloc_nodump_allocator.cc
memtable/alloc_tracker.cc
- memtable/hash_cuckoo_rep.cc
- memtable/hash_cuckoo_rep.cc
memtable/hash_linklist_rep.cc
- memtable/hash_linklist_rep.cc
- memtable/hash_skiplist_rep.cc
memtable/hash_skiplist_rep.cc
+ memtable/memtablerep_bench.cc
memtable/skiplistrep.cc
- memtable/skiplistrep.cc
- memtable/vectorrep.cc
memtable/vectorrep.cc
memtable/write_buffer_manager.cc
monitoring/histogram.cc
@@ -253,6 +264,7 @@ set(ROCKSDB_SOURCES
monitoring/statistics.cc
monitoring/thread_status_impl.cc
monitoring/thread_status_updater.cc
+ monitoring/thread_status_updater_debug.cc
monitoring/thread_status_util.cc
monitoring/thread_status_util_debug.cc
options/cf_options.cc
@@ -262,108 +274,101 @@ set(ROCKSDB_SOURCES
options/options_parser.cc
options/options_sanity_check.cc
port/stack_trace.cc
- table/adaptive_table_factory.cc
- table/block.cc
- table/block_based_filter_block.cc
- table/block_based_table_builder.cc
- table/block_based_table_factory.cc
- table/block_based_table_reader.cc
- table/block_builder.cc
+ table/adaptive/adaptive_table_factory.cc
+ table/block_based/block_based_filter_block.cc
+ table/block_based/block_based_table_builder.cc
+ table/block_based/block_based_table_factory.cc
+ table/block_based/block_based_table_reader.cc
+ table/block_based/block_builder.cc
+ table/block_based/block.cc
+ table/block_based/block_prefix_index.cc
+ table/block_based/data_block_footer.cc
+ table/block_based/data_block_hash_index.cc
+ table/block_based/flush_block_policy.cc
+ table/block_based/full_filter_block.cc
+ table/block_based/index_builder.cc
+ table/block_based/partitioned_filter_block.cc
table/block_fetcher.cc
- table/block_prefix_index.cc
table/bloom_block.cc
- table/cuckoo_table_builder.cc
- table/cuckoo_table_factory.cc
- table/cuckoo_table_reader.cc
- table/flush_block_policy.cc
+ table/cuckoo/cuckoo_table_builder.cc
+ table/cuckoo/cuckoo_table_factory.cc
+ table/cuckoo/cuckoo_table_reader.cc
table/format.cc
- table/full_filter_block.cc
table/get_context.cc
- table/index_builder.cc
table/iterator.cc
table/merging_iterator.cc
table/meta_blocks.cc
- table/partitioned_filter_block.cc
+ table/mock_table.cc
table/persistent_cache_helper.cc
- table/plain_table_builder.cc
- table/plain_table_factory.cc
- table/plain_table_index.cc
- table/plain_table_key_coding.cc
- table/plain_table_reader.cc
+ table/plain/plain_table_builder.cc
+ table/plain/plain_table_factory.cc
+ table/plain/plain_table_index.cc
+ table/plain/plain_table_key_coding.cc
+ table/plain/plain_table_reader.cc
+ table/sst_file_reader.cc
table/sst_file_writer.cc
table/table_properties.cc
+ table/table_reader_bench.cc
table/two_level_iterator.cc
- tools/db_bench_tool.cc
- tools/dump/db_dump_tool.cc
+ test_util/sync_point.cc
+ test_util/sync_point_impl.cc
tools/ldb_cmd.cc
tools/ldb_tool.cc
tools/sst_dump_tool.cc
- util/arena.cc
- util/auto_roll_logger.cc
+ trace_replay/trace_replay.cc
util/bloom.cc
util/coding.cc
util/compaction_job_stats_impl.cc
util/comparator.cc
util/compression_context_cache.cc
- util/concurrent_arena.cc
+ util/concurrent_task_limiter_impl.cc
+ util/crc32c_arm64.cc
util/crc32c.cc
- util/delete_scheduler.cc
util/dynamic_bloom.cc
- util/event_logger.cc
util/file_reader_writer.cc
- util/file_util.cc
- util/filename.cc
util/filter_policy.cc
util/hash.cc
- util/log_buffer.cc
- util/murmurhash.cc
- util/random.cc
- util/rate_limiter.cc
- util/slice.cc
- util/sst_file_manager_impl.cc
- util/status.cc
- util/status_message.cc
- util/string_util.cc
- util/sync_point.cc
- util/sync_point_impl.cc
- util/testutil.cc
- util/thread_local.cc
- util/threadpool_imp.cc
- util/transaction_test_util.cc
- util/xxhash.cc
utilities/backupable/backupable_db.cc
+ utilities/blob_db/blob_compaction_filter.cc
utilities/blob_db/blob_db.cc
+ utilities/blob_db/blob_db_impl.cc
+ utilities/blob_db/blob_db_impl_filesnapshot.cc
+ utilities/blob_db/blob_dump_tool.cc
+ utilities/blob_db/blob_file.cc
+ utilities/blob_db/blob_log_format.cc
+ utilities/blob_db/blob_log_reader.cc
+ utilities/blob_db/blob_log_writer.cc
+ utilities/cassandra/cassandra_compaction_filter.cc
+ utilities/cassandra/format.cc
+ utilities/cassandra/merge_operator.cc
+ utilities/cassandra/test_utils.cc
utilities/checkpoint/checkpoint_impl.cc
- utilities/col_buf_decoder.cc
- utilities/col_buf_encoder.cc
- utilities/column_aware_encoding_util.cc
utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
- utilities/date_tiered/date_tiered_db_impl.cc
+ utilities/convenience/info_log_finder.cc
utilities/debug.cc
- utilities/document/document_db.cc
- utilities/document/json_document.cc
- utilities/document/json_document_builder.cc
utilities/env_mirror.cc
- utilities/geodb/geodb_impl.cc
+ utilities/env_timed.cc
utilities/leveldb_options/leveldb_options.cc
- utilities/lua/rocks_lua_compaction_filter.cc
utilities/memory/memory_util.cc
+ utilities/merge_operators/bytesxor.cc
utilities/merge_operators/max.cc
utilities/merge_operators/put.cc
- utilities/merge_operators/string_append/stringappend.cc
utilities/merge_operators/string_append/stringappend2.cc
+ utilities/merge_operators/string_append/stringappend.cc
utilities/merge_operators/uint64add.cc
utilities/option_change_migration/option_change_migration.cc
utilities/options/options_util.cc
utilities/persistent_cache/block_cache_tier.cc
utilities/persistent_cache/block_cache_tier_file.cc
utilities/persistent_cache/block_cache_tier_metadata.cc
+ utilities/persistent_cache/hash_table_bench.cc
+ utilities/persistent_cache/persistent_cache_bench.cc
utilities/persistent_cache/persistent_cache_tier.cc
utilities/persistent_cache/volatile_tier_impl.cc
- utilities/redis/redis_lists.cc
utilities/simulator_cache/sim_cache.cc
- utilities/spatialdb/spatial_db.cc
utilities/table_properties_collectors/compact_on_deletion_collector.cc
+ utilities/trace/file_trace_reader_writer.cc
+ utilities/transactions/optimistic_transaction.cc
utilities/transactions/optimistic_transaction_db_impl.cc
utilities/transactions/pessimistic_transaction.cc
utilities/transactions/pessimistic_transaction_db.cc
@@ -379,8 +384,19 @@ set(ROCKSDB_SOURCES
utilities/ttl/db_ttl_impl.cc
utilities/write_batch_with_index/write_batch_with_index.cc
utilities/write_batch_with_index/write_batch_with_index_internal.cc
+ util/log_write_bench.cc
+ util/murmurhash.cc
+ util/random.cc
+ util/rate_limiter.cc
+ util/slice.cc
+ util/status.cc
+ util/string_util.cc
+ util/thread_local.cc
+ util/threadpool_imp.cc
+ util/xxhash.cc
)
+
if(WIN32)
list(APPEND ROCKSDB_SOURCES
port/win/io_win.cc
diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc
index 4313a718eda..7c4eed8fd7e 100644
--- a/storage/rocksdb/event_listener.cc
+++ b/storage/rocksdb/event_listener.cc
@@ -34,9 +34,9 @@
namespace myrocks {
-static std::vector<Rdb_index_stats>
-extract_index_stats(const std::vector<std::string> &files,
- const rocksdb::TablePropertiesCollection &props) {
+static std::vector<Rdb_index_stats> extract_index_stats(
+ const std::vector<std::string> &files,
+ const rocksdb::TablePropertiesCollection &props) {
std::vector<Rdb_index_stats> ret;
for (auto fn : files) {
const auto it = props.find(fn);
@@ -87,10 +87,11 @@ void Rdb_event_listener::OnExternalFileIngested(
void Rdb_event_listener::OnBackgroundError(
rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) {
rdb_log_status_error(*status, "Error detected in background");
+ // NO_LINT_DEBUG
sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason);
if (status->IsCorruption()) {
rdb_persist_corruption_marker();
abort();
}
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h
index f666c876eac..737973eb9da 100644
--- a/storage/rocksdb/event_listener.h
+++ b/storage/rocksdb/event_listener.h
@@ -22,7 +22,7 @@ namespace myrocks {
class Rdb_ddl_manager;
class Rdb_event_listener : public rocksdb::EventListener {
-public:
+ public:
Rdb_event_listener(const Rdb_event_listener &) = delete;
Rdb_event_listener &operator=(const Rdb_event_listener &) = delete;
@@ -40,10 +40,10 @@ public:
void OnBackgroundError(rocksdb::BackgroundErrorReason reason,
rocksdb::Status *status) override;
-private:
+ private:
Rdb_ddl_manager *m_ddl_manager;
void update_index_stats(const rocksdb::TableProperties &props);
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index cb289fb0077..41a83c816d3 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -15,7 +15,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
+#pragma implementation // gcc: Class implementation
#endif
#define MYSQL_SERVER 1
@@ -31,8 +31,8 @@
#include "./ha_rocksdb.h"
/* C++ standard header files */
-#include <algorithm>
#include <inttypes.h>
+#include <algorithm>
#include <limits>
#include <map>
#include <queue>
@@ -41,6 +41,9 @@
#include <vector>
/* MySQL includes */
+#include <mysql/psi/mysql_table.h>
+// #include <thread_pool_priv.h>
+#include <mysys_err.h>
#include "./debug_sync.h"
#include "./my_bit.h"
#include "./my_stacktrace.h"
@@ -48,20 +51,14 @@
#include "./sql_audit.h"
#include "./sql_table.h"
#include "./sql_hset.h"
-#include <mysql/psi/mysql_table.h>
#ifdef MARIAROCKS_NOT_YET
-#include <mysql/thread_pool_priv.h>
#endif
-#include <mysys_err.h>
-
-// Both MySQL and RocksDB define the same constant. To avoid compilation errors
-// till we make the fix in RocksDB, we'll temporary undefine it here.
-#undef CACHE_LINE_SIZE
/* RocksDB includes */
#include "monitoring/histogram.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/env.h"
+#include "rocksdb/memory_allocator.h"
#include "rocksdb/persistent_cache.h"
#include "rocksdb/rate_limiter.h"
#include "rocksdb/slice_transform.h"
@@ -70,6 +67,7 @@
#include "rocksdb/utilities/convenience.h"
#include "rocksdb/utilities/memory_util.h"
#include "rocksdb/utilities/sim_cache.h"
+#include "rocksdb/utilities/write_batch_with_index.h"
#include "util/stop_watch.h"
#include "./rdb_source_revision.h"
@@ -77,8 +75,10 @@
#include "./event_listener.h"
#include "./ha_rocksdb_proto.h"
#include "./logger.h"
+#include "./nosql_access.h"
#include "./rdb_cf_manager.h"
#include "./rdb_cf_options.h"
+#include "./rdb_converter.h"
#include "./rdb_datadic.h"
#include "./rdb_i_s.h"
#include "./rdb_index_merge.h"
@@ -100,18 +100,19 @@ void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
* Get the user thread's binary logging format
* @param thd user thread
* @return Value to be used as index into the binlog_format_names array
-*/
+ */
int thd_binlog_format(const MYSQL_THD thd);
/**
* Check if binary logging is filtered for thread's current db.
* @param thd Thread handle
* @retval 1 the query is not filtered, 0 otherwise.
-*/
+ */
bool thd_binlog_filter_ok(const MYSQL_THD thd);
}
MYSQL_PLUGIN_IMPORT bool my_disable_leak_check;
+extern my_bool opt_core_file;
// Needed in rocksdb_init_func
void ignore_db_dirs_append(const char *dirname_arg);
@@ -128,22 +129,14 @@ const std::string DEFAULT_CF_NAME("default");
const std::string DEFAULT_SYSTEM_CF_NAME("__system__");
const std::string PER_INDEX_CF_NAME("$per_index_cf");
-class Rdb_explicit_snapshot;
-
-std::mutex explicit_snapshot_mutex;
-ulonglong explicit_snapshot_counter = 0;
-std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>>
- explicit_snapshots;
static std::vector<GL_INDEX_ID> rdb_indexes_to_recalc;
#ifdef MARIADB_NOT_YET
class Rdb_explicit_snapshot : public explicit_snapshot {
- std::unique_ptr<rocksdb::ManagedSnapshot> snapshot;
-
public:
- static std::shared_ptr<Rdb_explicit_snapshot>
- create(snapshot_info_st *ss_info, rocksdb::DB *db,
- const rocksdb::Snapshot *snapshot) {
+ static std::shared_ptr<Rdb_explicit_snapshot> create(
+ snapshot_info_st *ss_info, rocksdb::DB *db,
+ const rocksdb::Snapshot *snapshot) {
std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
auto s = std::unique_ptr<rocksdb::ManagedSnapshot>(
new rocksdb::ManagedSnapshot(db, snapshot));
@@ -159,8 +152,24 @@ class Rdb_explicit_snapshot : public explicit_snapshot {
return ret;
}
- static std::shared_ptr<Rdb_explicit_snapshot>
- get(const ulonglong snapshot_id) {
+ static std::string dump_snapshots() {
+ std::string str;
+ std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
+ for (const auto &elem : explicit_snapshots) {
+ const auto &ss = elem.second.lock();
+ DBUG_ASSERT(ss != nullptr);
+ const auto &info = ss->ss_info;
+ str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) +
+ "\nBinlog File: " + info.binlog_file +
+ "\nBinlog Pos: " + std::to_string(info.binlog_pos) +
+ "\nGtid Executed: " + info.gtid_executed + "\n";
+ }
+
+ return str;
+ }
+
+ static std::shared_ptr<Rdb_explicit_snapshot> get(
+ const ulonglong snapshot_id) {
std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
auto elem = explicit_snapshots.find(snapshot_id);
if (elem == explicit_snapshots.end()) {
@@ -172,14 +181,27 @@ class Rdb_explicit_snapshot : public explicit_snapshot {
rocksdb::ManagedSnapshot *get_snapshot() { return snapshot.get(); }
Rdb_explicit_snapshot(snapshot_info_st ss_info,
- std::unique_ptr<rocksdb::ManagedSnapshot> snapshot)
+ std::unique_ptr<rocksdb::ManagedSnapshot> &&snapshot)
: explicit_snapshot(ss_info), snapshot(std::move(snapshot)) {}
virtual ~Rdb_explicit_snapshot() {
std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
explicit_snapshots.erase(ss_info.snapshot_id);
}
+
+ private:
+ std::unique_ptr<rocksdb::ManagedSnapshot> snapshot;
+
+ static std::mutex explicit_snapshot_mutex;
+ static ulonglong explicit_snapshot_counter;
+ static std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>>
+ explicit_snapshots;
};
+
+std::mutex Rdb_explicit_snapshot::explicit_snapshot_mutex;
+ulonglong Rdb_explicit_snapshot::explicit_snapshot_counter = 0;
+std::unordered_map<ulonglong, std::weak_ptr<Rdb_explicit_snapshot>>
+ Rdb_explicit_snapshot::explicit_snapshots;
#endif
/**
@@ -188,10 +210,11 @@ class Rdb_explicit_snapshot : public explicit_snapshot {
void ha_rocksdb::update_row_stats(const operation_type &type) {
DBUG_ASSERT(type < ROWS_MAX);
// Find if we are modifying system databases.
- if (table->s && m_tbl_def->m_is_mysql_system_table)
+ if (table->s && m_tbl_def->m_is_mysql_system_table) {
global_stats.system_rows[type].inc();
- else
+ } else {
global_stats.rows[type].inc();
+ }
}
void dbug_dump_database(rocksdb::DB *db);
@@ -199,8 +222,8 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton,
my_core::TABLE_SHARE *table_arg,
my_core::MEM_ROOT *mem_root);
-static rocksdb::CompactRangeOptions
-getCompactRangeOptions(int concurrency = 0) {
+static rocksdb::CompactRangeOptions getCompactRangeOptions(
+ int concurrency = 0) {
rocksdb::CompactRangeOptions compact_range_options;
compact_range_options.bottommost_level_compaction =
rocksdb::BottommostLevelCompaction::kForce;
@@ -261,37 +284,77 @@ static void rocksdb_flush_all_memtables() {
}
}
+static void rocksdb_delete_column_family_stub(
+ THD *const /* thd */, struct st_mysql_sys_var *const /* var */,
+ void *const /* var_ptr */, const void *const /* save */) {}
+
+static int rocksdb_delete_column_family(
+ THD *const /* thd */, struct st_mysql_sys_var *const /* var */,
+ void *const /* var_ptr */, struct st_mysql_value *const value) {
+ // Return failure for now until the race condition between creating
+ // CF and deleting CF is resolved
+ return HA_EXIT_FAILURE;
+
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int len = sizeof(buff);
+
+ DBUG_ASSERT(value != nullptr);
+
+ if (const char *const cf = value->val_str(value, buff, &len)) {
+ auto &cf_manager = rdb_get_cf_manager();
+ auto ret = cf_manager.drop_cf(cf);
+ if (ret == HA_EXIT_SUCCESS) {
+ // NO_LINT_DEBUG
+ sql_print_information("RocksDB: Dropped column family: %s\n", cf);
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Failed to drop column family: %s, error: %d\n",
+ cf, ret);
+ }
+
+ return ret;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
///////////////////////////////////////////////////////////
// Hash map: table name => open table handler
///////////////////////////////////////////////////////////
-namespace // anonymous namespace = not visible outside this source file
+namespace // anonymous namespace = not visible outside this source file
{
const ulong TABLE_HASH_SIZE = 32;
typedef Hash_set<Rdb_table_handler> Rdb_table_set;
-struct Rdb_open_tables_map {
+class Rdb_open_tables_map {
+ private:
/* Hash table used to track the handlers of open tables */
- Rdb_table_set m_hash;
+ std::unordered_map<std::string, Rdb_table_handler *> m_table_map;
+
/* The mutex used to protect the hash table */
mutable mysql_mutex_t m_mutex;
- static uchar *get_hash_key(const Rdb_table_handler *const table_handler,
- size_t *const length,
- my_bool not_used MY_ATTRIBUTE((__unused__)));
+ public:
+ void init() {
+ m_table_map.clear();
+ mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST);
+ }
+
+ void free() {
+ m_table_map.clear();
+ mysql_mutex_destroy(&m_mutex);
+ }
+ size_t count() { return m_table_map.size(); }
Rdb_table_handler *get_table_handler(const char *const table_name);
void release_table_handler(Rdb_table_handler *const table_handler);
- Rdb_open_tables_map() : m_hash(get_hash_key, system_charset_info) { }
-
- void free_hash(void) { m_hash.~Rdb_table_set(); }
-
std::vector<std::string> get_table_names(void) const;
};
-} // anonymous namespace
+} // anonymous namespace
static Rdb_open_tables_map rdb_open_tables;
@@ -326,6 +389,7 @@ static int rocksdb_create_checkpoint(
status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str());
delete checkpoint;
if (status.ok()) {
+ // NO_LINT_DEBUG
sql_print_information(
"RocksDB: created checkpoint in directory : %s\n",
checkpoint_dir.c_str());
@@ -355,6 +419,7 @@ static void rocksdb_force_flush_memtable_now_stub(
static int rocksdb_force_flush_memtable_now(
THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
struct st_mysql_value *const value) {
+ // NO_LINT_DEBUG
sql_print_information("RocksDB: Manual memtable flush.");
rocksdb_flush_all_memtables();
return HA_EXIT_SUCCESS;
@@ -367,6 +432,7 @@ static void rocksdb_force_flush_memtable_and_lzero_now_stub(
static int rocksdb_force_flush_memtable_and_lzero_now(
THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
struct st_mysql_value *const value) {
+ // NO_LINT_DEBUG
sql_print_information("RocksDB: Manual memtable and L0 flush.");
rocksdb_flush_all_memtables();
@@ -375,29 +441,46 @@ static int rocksdb_force_flush_memtable_and_lzero_now(
rocksdb::ColumnFamilyMetaData metadata;
rocksdb::ColumnFamilyDescriptor cf_descr;
+ int i, max_attempts = 3, num_errors = 0;
+
for (const auto &cf_handle : cf_manager.get_all_cf()) {
- rdb->GetColumnFamilyMetaData(cf_handle, &metadata);
- cf_handle->GetDescriptor(&cf_descr);
- c_options.output_file_size_limit = cf_descr.options.target_file_size_base;
+ for (i = 0; i < max_attempts; i++) {
+ rdb->GetColumnFamilyMetaData(cf_handle, &metadata);
+ cf_handle->GetDescriptor(&cf_descr);
+ c_options.output_file_size_limit = cf_descr.options.target_file_size_base;
+
+ DBUG_ASSERT(metadata.levels[0].level == 0);
+ std::vector<std::string> file_names;
+ for (auto &file : metadata.levels[0].files) {
+ file_names.emplace_back(file.db_path + file.name);
+ }
- DBUG_ASSERT(metadata.levels[0].level == 0);
- std::vector<std::string> file_names;
- for (auto &file : metadata.levels[0].files) {
- file_names.emplace_back(file.db_path + file.name);
- }
+ if (file_names.empty()) {
+ break;
+ }
- if (!file_names.empty()) {
rocksdb::Status s;
s = rdb->CompactFiles(c_options, cf_handle, file_names, 1);
+ // Due to a race, it's possible for CompactFiles to collide
+ // with auto compaction, causing an error to return
+ // regarding file not found. In that case, retry.
+ if (s.IsInvalidArgument()) {
+ continue;
+ }
+
if (!s.ok() && !s.IsAborted()) {
rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
return HA_EXIT_FAILURE;
}
+ break;
+ }
+ if (i == max_attempts) {
+ num_errors++;
}
}
- return HA_EXIT_SUCCESS;
+ return num_errors == 0 ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE;
}
static void rocksdb_drop_index_wakeup_thread(
@@ -468,11 +551,9 @@ static void rocksdb_set_update_cf_options(THD *thd,
struct st_mysql_sys_var *var,
void *var_ptr, const void *save);
-static int rocksdb_check_bulk_load(THD *const thd,
- struct st_mysql_sys_var *var
- MY_ATTRIBUTE((__unused__)),
- void *save,
- struct st_mysql_value *value);
+static int rocksdb_check_bulk_load(
+ THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *save, struct st_mysql_value *value);
static int rocksdb_check_bulk_load_allow_unsorted(
THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
@@ -499,6 +580,8 @@ static int rocksdb_validate_set_block_cache_size(
static long long rocksdb_block_cache_size;
static long long rocksdb_sim_cache_size;
static my_bool rocksdb_use_clock_cache;
+static double rocksdb_cache_high_pri_pool_ratio;
+static my_bool rocksdb_cache_dump;
/* Use unsigned long long instead of uint64_t because of MySQL compatibility */
static unsigned long long // NOLINT(runtime/int)
rocksdb_rate_limiter_bytes_per_sec;
@@ -518,8 +601,10 @@ static my_bool rocksdb_force_compute_memtable_stats;
static uint32_t rocksdb_force_compute_memtable_stats_cachetime;
static my_bool rocksdb_debug_optimizer_no_zero_cardinality;
static uint32_t rocksdb_wal_recovery_mode;
+static uint32_t rocksdb_stats_level;
static uint32_t rocksdb_access_hint_on_compaction_start;
static char *rocksdb_compact_cf_name;
+static char *rocksdb_delete_cf_name;
static char *rocksdb_checkpoint_name;
static my_bool rocksdb_signal_drop_index_thread;
static my_bool rocksdb_signal_remove_mariabackup_checkpoint;
@@ -555,10 +640,25 @@ char *compression_types_val=
const_cast<char*>(get_rocksdb_supported_compression_types());
static unsigned long rocksdb_write_policy =
rocksdb::TxnDBWritePolicy::WRITE_COMMITTED;
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+char *rocksdb_read_free_rpl_tables;
+std::mutex rocksdb_read_free_rpl_tables_mutex;
+#if defined(HAVE_PSI_INTERFACE)
+Regex_list_handler rdb_read_free_regex_handler(key_rwlock_read_free_rpl_tables);
+#else
+Regex_list_handler rdb_read_free_regex_handler;
+#endif
+enum read_free_rpl_type { OFF = 0, PK_ONLY, PK_SK };
+static unsigned long rocksdb_read_free_rpl = read_free_rpl_type::OFF;
+#endif
+
static my_bool rocksdb_error_on_suboptimal_collation = 1;
static uint32_t rocksdb_stats_recalc_rate = 0;
static uint32_t rocksdb_debug_manual_compaction_delay = 0;
static uint32_t rocksdb_max_manual_compactions = 0;
+static my_bool rocksdb_rollback_on_timeout = FALSE;
+static my_bool rocksdb_enable_insert_with_update_caching = TRUE;
std::atomic<uint64_t> rocksdb_row_lock_deadlocks(0);
std::atomic<uint64_t> rocksdb_row_lock_wait_timeouts(0);
@@ -566,6 +666,9 @@ std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0);
std::atomic<uint64_t> rocksdb_wal_group_syncs(0);
std::atomic<uint64_t> rocksdb_manual_compactions_processed(0);
std::atomic<uint64_t> rocksdb_manual_compactions_running(0);
+#ifndef DBUG_OFF
+std::atomic<uint64_t> rocksdb_num_get_for_update_calls(0);
+#endif
@@ -635,7 +738,7 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
o->listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager));
o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;
- o->max_open_files = -2; // auto-tune to 50% open_files_limit
+ o->max_open_files = -2; // auto-tune to 50% open_files_limit
o->two_write_queues = true;
o->manual_wal_flush = true;
@@ -659,6 +762,15 @@ static TYPELIB write_policy_typelib = {array_elements(write_policy_names) - 1,
"write_policy_typelib",
write_policy_names, nullptr};
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+/* This array needs to be kept up to date with myrocks::read_free_rpl_type */
+static const char *read_free_rpl_names[] = {"OFF", "PK_ONLY", "PK_SK", NullS};
+
+static TYPELIB read_free_rpl_typelib = {array_elements(read_free_rpl_names) - 1,
+ "read_free_rpl_typelib",
+ read_free_rpl_names, nullptr};
+#endif
+
/* This enum needs to be kept up to date with rocksdb::InfoLogLevel */
static const char *info_log_level_names[] = {"debug_level", "info_level",
"warn_level", "error_level",
@@ -680,6 +792,23 @@ static void rocksdb_set_rocksdb_info_log_level(
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
+static void rocksdb_set_rocksdb_stats_level(THD *const thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ rocksdb_db_options->statistics->set_stats_level(
+ static_cast<const rocksdb::StatsLevel>(
+ *static_cast<const uint64_t *>(save)));
+ // Actual stats level is defined at rocksdb dbopt::statistics::stats_level_
+ // so adjusting rocksdb_stats_level here to make sure it points to
+ // the correct stats level.
+ rocksdb_stats_level = rocksdb_db_options->statistics->get_stats_level();
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
static void rocksdb_set_reset_stats(
my_core::THD *const /* unused */,
my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
@@ -804,7 +933,7 @@ static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG,
static MYSQL_THDVAR_BOOL(
commit_time_batch_for_recovery, PLUGIN_VAR_RQCMDARG,
"TransactionOptions::commit_time_batch_for_recovery for RocksDB", nullptr,
- nullptr, FALSE);
+ nullptr, TRUE);
static MYSQL_THDVAR_BOOL(
trace_sst_api, PLUGIN_VAR_RQCMDARG,
@@ -844,10 +973,11 @@ static MYSQL_THDVAR_STR(tmpdir, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
"Directory for temporary files during DDL operations.",
nullptr, nullptr, "");
+#define DEFAULT_SKIP_UNIQUE_CHECK_TABLES ".*"
static MYSQL_THDVAR_STR(
skip_unique_check_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
"Skip unique constraint checking for the specified tables", nullptr,
- nullptr, ".*");
+ nullptr, DEFAULT_SKIP_UNIQUE_CHECK_TABLES);
static MYSQL_THDVAR_BOOL(
commit_in_the_middle, PLUGIN_VAR_RQCMDARG,
@@ -861,11 +991,83 @@ static MYSQL_THDVAR_BOOL(
" Blind delete is disabled if the table has secondary key",
nullptr, nullptr, FALSE);
-static MYSQL_THDVAR_STR(
- read_free_rpl_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+
+static const char *DEFAULT_READ_FREE_RPL_TABLES = ".*";
+
+static int rocksdb_validate_read_free_rpl_tables(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *save,
+ struct st_mysql_value *value) {
+ char buff[STRING_BUFFER_USUAL_SIZE];
+ int length = sizeof(buff);
+ const char *wlist_buf = value->val_str(value, buff, &length);
+ const auto wlist = wlist_buf ? wlist_buf : DEFAULT_READ_FREE_RPL_TABLES;
+
+#if defined(HAVE_PSI_INTERFACE)
+ Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables);
+#else
+ Regex_list_handler regex_handler;
+#endif
+
+ if (!regex_handler.set_patterns(wlist)) {
+ warn_about_bad_patterns(&regex_handler, "rocksdb_read_free_rpl_tables");
+ return HA_EXIT_FAILURE;
+ }
+
+ *static_cast<const char **>(save) = my_strdup(wlist, MYF(MY_WME));
+ return HA_EXIT_SUCCESS;
+}
+
+static void rocksdb_update_read_free_rpl_tables(
+ THD *thd MY_ATTRIBUTE((__unused__)),
+ struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *var_ptr,
+ const void *save) {
+ const auto wlist = *static_cast<const char *const *>(save);
+ DBUG_ASSERT(wlist != nullptr);
+
+ // This is bound to succeed since we've already checked for bad patterns in
+ // rocksdb_validate_read_free_rpl_tables
+ rdb_read_free_regex_handler.set_patterns(wlist);
+
+ // update all table defs
+ struct Rdb_read_free_rpl_updater : public Rdb_tables_scanner {
+ int add_table(Rdb_tbl_def *tdef) override {
+ tdef->check_and_set_read_free_rpl_table();
+ return HA_EXIT_SUCCESS;
+ }
+ } updater;
+ ddl_manager.scan_for_tables(&updater);
+
+ if (wlist == DEFAULT_READ_FREE_RPL_TABLES) {
+ // If running SET var = DEFAULT, then rocksdb_validate_read_free_rpl_tables
+ // isn't called, and memory is never allocated for the value. Allocate it
+ // here.
+ *static_cast<const char **>(var_ptr) = my_strdup(wlist, MYF(MY_WME));
+ } else {
+ // Otherwise, we just reuse the value allocated from
+ // rocksdb_validate_read_free_rpl_tables.
+ *static_cast<const char **>(var_ptr) = wlist;
+ }
+}
+
+static MYSQL_SYSVAR_STR(
+ read_free_rpl_tables, rocksdb_read_free_rpl_tables,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC /*| PLUGIN_VAR_ALLOCATED*/,
"List of tables that will use read-free replication on the slave "
"(i.e. not lookup a row during replication)",
- nullptr, nullptr, "");
+ rocksdb_validate_read_free_rpl_tables, rocksdb_update_read_free_rpl_tables,
+ DEFAULT_READ_FREE_RPL_TABLES);
+
+static MYSQL_SYSVAR_ENUM(
+ read_free_rpl, rocksdb_read_free_rpl,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
+ "Use read-free replication on the slave (i.e. no row lookup during "
+ "replication). Default is OFF, PK_SK will enable it on all tables with "
+ "primary key. PK_ONLY will enable it on tables where the only key is the "
+ "primary key (i.e. no secondary keys).",
+ nullptr, nullptr, read_free_rpl_type::OFF, &read_free_rpl_typelib);
+#endif
static MYSQL_THDVAR_BOOL(skip_bloom_filter_on_read, PLUGIN_VAR_RQCMDARG,
"Skip using bloom filter for reads", nullptr, nullptr,
@@ -1033,6 +1235,14 @@ static MYSQL_SYSVAR_UINT(
/* min */ (uint)rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords,
/* max */ (uint)rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0);
+static MYSQL_SYSVAR_UINT(
+ stats_level, rocksdb_stats_level, PLUGIN_VAR_RQCMDARG,
+ "Statistics Level for RocksDB. Default is 0 (kExceptHistogramOrTimers)",
+ nullptr, rocksdb_set_rocksdb_stats_level,
+ /* default */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers,
+ /* min */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers,
+ /* max */ (uint)rocksdb::StatsLevel::kAll, 0);
+
static MYSQL_SYSVAR_SIZE_T(compaction_readahead_size,
rocksdb_db_options->compaction_readahead_size,
PLUGIN_VAR_RQCMDARG,
@@ -1107,7 +1317,8 @@ static MYSQL_SYSVAR_ULONG(
persistent_cache_size_mb, rocksdb_persistent_cache_size_mb,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Size of cache in MB for BlockBasedTableOptions::persistent_cache "
- "for RocksDB", nullptr, nullptr, rocksdb_persistent_cache_size_mb,
+ "for RocksDB",
+ nullptr, nullptr, rocksdb_persistent_cache_size_mb,
/* min */ 0L, /* max */ ULONG_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(
@@ -1286,7 +1497,7 @@ static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size,
rocksdb_validate_set_block_cache_size, nullptr,
/* default */ RDB_DEFAULT_BLOCK_CACHE_SIZE,
/* min */ RDB_MIN_BLOCK_CACHE_SIZE,
- /* max */ LONGLONG_MAX,
+ /* max */ LLONG_MAX,
/* Block size */ RDB_MIN_BLOCK_CACHE_SIZE);
static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size,
@@ -1295,15 +1506,26 @@ static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size,
nullptr,
/* default */ 0,
/* min */ 0,
- /* max */ LONGLONG_MAX,
+ /* max */ LLONG_MAX,
/* Block size */ 0);
static MYSQL_SYSVAR_BOOL(
- use_clock_cache,
- rocksdb_use_clock_cache,
+ use_clock_cache, rocksdb_use_clock_cache,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Use ClockCache instead of default LRUCache for RocksDB",
- nullptr, nullptr, false);
+ "Use ClockCache instead of default LRUCache for RocksDB", nullptr, nullptr,
+ false);
+
+static MYSQL_SYSVAR_BOOL(cache_dump, rocksdb_cache_dump,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Include RocksDB block cache content in core dump.",
+ nullptr, nullptr, true);
+
+static MYSQL_SYSVAR_DOUBLE(cache_high_pri_pool_ratio,
+ rocksdb_cache_high_pri_pool_ratio,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Specify the size of block cache high-pri pool",
+ nullptr, nullptr, /* default */ 0.0, /* min */ 0.0,
+ /* max */ 1.0, 0);
static MYSQL_SYSVAR_BOOL(
cache_index_and_filter_blocks,
@@ -1313,6 +1535,14 @@ static MYSQL_SYSVAR_BOOL(
"BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB",
nullptr, nullptr, true);
+static MYSQL_SYSVAR_BOOL(
+ cache_index_and_filter_with_high_priority,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_tbl_options->cache_index_and_filter_blocks_with_high_priority),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "cache_index_and_filter_blocks_with_high_priority for RocksDB", nullptr,
+ nullptr, true);
+
// When pin_l0_filter_and_index_blocks_in_cache is true, RocksDB will use the
// LRU cache, but will always keep the filter & idndex block's handle checked
// out (=won't call ShardedLRUCache::Release), plus the parsed out objects
@@ -1441,10 +1671,10 @@ static MYSQL_SYSVAR_UINT(
nullptr, nullptr, 0, /* min */ 0, /* max */ INT_MAX, 0);
static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats,
- rocksdb_force_compute_memtable_stats,
- PLUGIN_VAR_RQCMDARG,
- "Force to always compute memtable stats",
- nullptr, nullptr, TRUE);
+ rocksdb_force_compute_memtable_stats,
+ PLUGIN_VAR_RQCMDARG,
+ "Force to always compute memtable stats", nullptr,
+ nullptr, TRUE);
static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime,
rocksdb_force_compute_memtable_stats_cachetime,
@@ -1464,6 +1694,10 @@ static MYSQL_SYSVAR_STR(compact_cf, rocksdb_compact_cf_name,
rocksdb_compact_column_family,
rocksdb_compact_column_family_stub, "");
+static MYSQL_SYSVAR_STR(delete_cf, rocksdb_delete_cf_name, PLUGIN_VAR_RQCMDARG,
+ "Delete column family", rocksdb_delete_column_family,
+ rocksdb_delete_column_family_stub, "");
+
static MYSQL_SYSVAR_STR(create_checkpoint, rocksdb_checkpoint_name,
PLUGIN_VAR_RQCMDARG, "Checkpoint directory",
rocksdb_create_checkpoint,
@@ -1535,6 +1769,12 @@ static MYSQL_SYSVAR_UINT(
"Maximum number of pending + ongoing number of manual compactions.",
nullptr, nullptr, /* default */ 10, /* min */ 0, /* max */ UINT_MAX, 0);
+static MYSQL_SYSVAR_BOOL(
+ rollback_on_timeout, rocksdb_rollback_on_timeout, PLUGIN_VAR_OPCMDARG,
+ "Whether to roll back the complete transaction or a single statement on "
+ "lock wait timeout (a single statement by default)",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_UINT(
debug_manual_compaction_delay, rocksdb_debug_manual_compaction_delay,
PLUGIN_VAR_RQCMDARG,
@@ -1626,7 +1866,7 @@ static MYSQL_SYSVAR_LONGLONG(
rocksdb_compaction_sequential_deletes_file_size, PLUGIN_VAR_RQCMDARG,
"Minimum file size required for compaction_sequential_deletes", nullptr,
rocksdb_set_compaction_options, 0L,
- /* min */ -1L, /* max */ LONGLONG_MAX, 0);
+ /* min */ -1L, /* max */ LLONG_MAX, 0);
static MYSQL_SYSVAR_BOOL(
compaction_sequential_deletes_count_sd,
@@ -1731,6 +1971,13 @@ static MYSQL_SYSVAR_BOOL(error_on_suboptimal_collation,
"collation is used",
nullptr, nullptr, TRUE);
+static MYSQL_SYSVAR_BOOL(
+ enable_insert_with_update_caching,
+ rocksdb_enable_insert_with_update_caching, PLUGIN_VAR_OPCMDARG,
+ "Whether to enable optimization where we cache the read from a failed "
+ "insertion attempt in INSERT ON DUPLICATE KEY UPDATE",
+ nullptr, nullptr, TRUE);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -1748,7 +1995,10 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(trace_sst_api),
MYSQL_SYSVAR(commit_in_the_middle),
MYSQL_SYSVAR(blind_delete_primary_key),
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
MYSQL_SYSVAR(read_free_rpl_tables),
+ MYSQL_SYSVAR(read_free_rpl),
+#endif
MYSQL_SYSVAR(bulk_load_size),
MYSQL_SYSVAR(merge_buf_size),
MYSQL_SYSVAR(enable_bulk_load_api),
@@ -1800,6 +2050,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(enable_thread_tracking),
MYSQL_SYSVAR(perf_context_level),
MYSQL_SYSVAR(wal_recovery_mode),
+ MYSQL_SYSVAR(stats_level),
MYSQL_SYSVAR(access_hint_on_compaction_start),
MYSQL_SYSVAR(new_table_reader_for_compaction_inputs),
MYSQL_SYSVAR(compaction_readahead_size),
@@ -1809,7 +2060,10 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(block_cache_size),
MYSQL_SYSVAR(sim_cache_size),
MYSQL_SYSVAR(use_clock_cache),
+ MYSQL_SYSVAR(cache_high_pri_pool_ratio),
+ MYSQL_SYSVAR(cache_dump),
MYSQL_SYSVAR(cache_index_and_filter_blocks),
+ MYSQL_SYSVAR(cache_index_and_filter_with_high_priority),
MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache),
MYSQL_SYSVAR(index_type),
MYSQL_SYSVAR(hash_index_allow_collision),
@@ -1838,6 +2092,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality),
MYSQL_SYSVAR(compact_cf),
+ MYSQL_SYSVAR(delete_cf),
MYSQL_SYSVAR(signal_drop_index_thread),
MYSQL_SYSVAR(pause_background_work),
MYSQL_SYSVAR(enable_2pc),
@@ -1883,10 +2138,13 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(debug_manual_compaction_delay),
MYSQL_SYSVAR(max_manual_compactions),
MYSQL_SYSVAR(manual_compaction_threads),
+ MYSQL_SYSVAR(rollback_on_timeout),
+
+ MYSQL_SYSVAR(enable_insert_with_update_caching),
nullptr};
-static rocksdb::WriteOptions
-rdb_get_rocksdb_write_options(my_core::THD *const thd) {
+static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
+ my_core::THD *const thd) {
rocksdb::WriteOptions opt;
opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
@@ -1943,19 +2201,6 @@ static int rocksdb_compact_column_family(THD *const thd,
///////////////////////////////////////////////////////////////////////////////////////////
-/**
- @brief
- Function we use in the creation of our hash to get key.
-*/
-
-uchar *
-Rdb_open_tables_map::get_hash_key(const Rdb_table_handler *const table_handler,
- size_t *const length,
- my_bool not_used MY_ATTRIBUTE((__unused__))) {
- *length = table_handler->m_table_name_length;
- return reinterpret_cast<uchar *>(table_handler->m_table_name);
-}
-
/*
Drop index thread's control
*/
@@ -2012,7 +2257,7 @@ class Rdb_snapshot_notifier : public rocksdb::TransactionNotifier {
void SnapshotCreated(const rocksdb::Snapshot *snapshot) override;
-public:
+ public:
Rdb_snapshot_notifier(const Rdb_snapshot_notifier &) = delete;
Rdb_snapshot_notifier &operator=(const Rdb_snapshot_notifier &) = delete;
@@ -2046,9 +2291,9 @@ String timeout_message(const char *command, const char *name1,
/* This is the base class for transactions when interacting with rocksdb.
-*/
+ */
class Rdb_transaction {
-protected:
+ protected:
ulonglong m_write_count = 0;
ulonglong m_insert_count = 0;
ulonglong m_update_count = 0;
@@ -2059,7 +2304,7 @@ protected:
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
-private:
+ private:
/*
Number of write operations this transaction had when we took the last
savepoint (the idea is not to take another savepoint if we haven't made
@@ -2067,7 +2312,7 @@ private:
*/
ulonglong m_writes_at_last_savepoint;
-protected:
+ protected:
protected:
THD *m_thd = nullptr;
@@ -2092,9 +2337,9 @@ protected:
// This should be used only when updating binlog information.
virtual rocksdb::WriteBatchBase *get_write_batch() = 0;
virtual bool commit_no_binlog() = 0;
- virtual rocksdb::Iterator *
- get_iterator(const rocksdb::ReadOptions &options,
- rocksdb::ColumnFamilyHandle *column_family) = 0;
+ virtual rocksdb::Iterator *get_iterator(
+ const rocksdb::ReadOptions &options,
+ rocksdb::ColumnFamilyHandle *column_family) = 0;
protected:
/*
@@ -2139,7 +2384,9 @@ protected:
String m_detailed_error;
int64_t m_snapshot_timestamp = 0;
bool m_ddl_transaction;
+#ifdef MARIAROCKS_NOT_YET
std::shared_ptr<Rdb_explicit_snapshot> m_explicit_snapshot;
+#endif
/*
Tracks the number of tables in use through external_lock.
@@ -2173,8 +2420,9 @@ protected:
RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex);
- for (auto it : s_tx_list)
+ for (auto it : s_tx_list) {
walker->process_tran(it);
+ }
RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex);
}
@@ -2194,7 +2442,8 @@ protected:
convert_error_code_to_mysql() does: force a statement
rollback before returning HA_ERR_LOCK_WAIT_TIMEOUT:
*/
- my_core::thd_mark_transaction_to_rollback(thd, false /*just statement*/);
+ my_core::thd_mark_transaction_to_rollback(
+ thd, static_cast<bool>(rocksdb_rollback_on_timeout));
m_detailed_error.copy(timeout_message(
"index", tbl_def->full_tablename().c_str(), kd.get_name().c_str()));
table_handler->m_lock_wait_timeout_counter.inc();
@@ -2216,9 +2465,10 @@ protected:
char user_host_buff[MAX_USER_HOST_SIZE + 1];
make_user_name(thd, user_host_buff);
// NO_LINT_DEBUG
- sql_print_warning("Got snapshot conflict errors: User: %s "
- "Query: %s",
- user_host_buff, thd->query());
+ sql_print_warning(
+ "Got snapshot conflict errors: User: %s "
+ "Query: %s",
+ user_host_buff, thd->query());
}
m_detailed_error = String(" (snapshot conflict)", system_charset_info);
table_handler->m_deadlock_counter.inc();
@@ -2315,8 +2565,9 @@ protected:
if (m_is_tx_failed) {
rollback();
res = false;
- } else
+ } else {
res = commit();
+ }
return res;
}
@@ -2367,7 +2618,7 @@ protected:
bool has_snapshot() const { return m_read_opts.snapshot != nullptr; }
-private:
+ private:
// The Rdb_sst_info structures we are currently loading. In a partitioned
// table this can have more than one entry
std::vector<std::shared_ptr<Rdb_sst_info>> m_curr_bulk_load;
@@ -2376,7 +2627,7 @@ private:
/* External merge sorts for bulk load: key ID -> merge sort instance */
std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
-public:
+ public:
int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf,
Rdb_index_merge **key_merge) {
int res;
@@ -2397,22 +2648,62 @@ public:
return HA_EXIT_SUCCESS;
}
- int finish_bulk_load(int print_client_error = true) {
- int rc = 0, rc2;
+ /* Finish bulk loading for all table handlers belongs to one connection */
+ int finish_bulk_load(bool *is_critical_error = nullptr,
+ int print_client_error = true) {
+ Ensure_cleanup cleanup([&]() {
+ // Always clear everything regardless of success/failure
+ m_curr_bulk_load.clear();
+ m_curr_bulk_load_tablename.clear();
+ m_key_merge.clear();
+ });
+
+ int rc = 0;
+ if (is_critical_error) {
+ *is_critical_error = true;
+ }
+
+ // PREPARE phase: finish all on-going bulk loading Rdb_sst_info and
+ // collect all Rdb_sst_commit_info containing (SST files, cf)
+ int rc2 = 0;
+ std::vector<Rdb_sst_info::Rdb_sst_commit_info> sst_commit_list;
+ sst_commit_list.reserve(m_curr_bulk_load.size());
+
+ for (auto &sst_info : m_curr_bulk_load) {
+ Rdb_sst_info::Rdb_sst_commit_info commit_info;
- std::vector<std::shared_ptr<Rdb_sst_info>>::iterator it;
- for (it = m_curr_bulk_load.begin(); it != m_curr_bulk_load.end(); it++) {
- rc2 = (*it)->commit(print_client_error);
- if (rc2 != 0 && rc == 0) {
+ // Commit the list of SST files and move it to the end of
+ // sst_commit_list, effectively transfer the ownership over
+ rc2 = sst_info->finish(&commit_info, print_client_error);
+ if (rc2 && rc == 0) {
+ // Don't return yet - make sure we finish all the SST infos
rc = rc2;
}
+
+ // Make sure we have work to do - we might be losing the race
+ if (rc2 == 0 && commit_info.has_work()) {
+ sst_commit_list.emplace_back(std::move(commit_info));
+ DBUG_ASSERT(!commit_info.has_work());
+ }
+ }
+
+ if (rc) {
+ return rc;
}
- m_curr_bulk_load.clear();
- m_curr_bulk_load_tablename.clear();
- DBUG_ASSERT(m_curr_bulk_load.size() == 0);
- // Flush the index_merge sort buffers
+ // MERGING Phase: Flush the index_merge sort buffers into SST files in
+ // Rdb_sst_info and collect all Rdb_sst_commit_info containing
+ // (SST files, cf)
if (!m_key_merge.empty()) {
+ Ensure_cleanup malloc_cleanup([]() {
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this
+ point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+ });
+
rocksdb::Slice merge_key;
rocksdb::Slice merge_val;
for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
@@ -2429,9 +2720,20 @@ public:
// be missed by the compaction filter and not be marked for
// removal. It is unclear how to lock the sql table from the storage
// engine to prevent modifications to it while bulk load is occurring.
- if (keydef == nullptr || table_name.empty()) {
- rc2 = HA_ERR_ROCKSDB_BULK_LOAD;
- break;
+ if (keydef == nullptr) {
+ if (is_critical_error) {
+ // We used to set the error but simply ignores it. This follows
+ // current behavior and we should revisit this later
+ *is_critical_error = false;
+ }
+ return HA_ERR_KEY_NOT_FOUND;
+ } else if (table_name.empty()) {
+ if (is_critical_error) {
+ // We used to set the error but simply ignores it. This follows
+ // current behavior and we should revisit this later
+ *is_critical_error = false;
+ }
+ return HA_ERR_NO_SUCH_TABLE;
}
const std::string &index_name = keydef->get_name();
Rdb_index_merge &rdb_merge = it->second;
@@ -2440,38 +2742,112 @@ public:
// "./database/table"
std::replace(table_name.begin(), table_name.end(), '.', '/');
table_name = "./" + table_name;
- Rdb_sst_info sst_info(rdb, table_name, index_name, rdb_merge.get_cf(),
- *rocksdb_db_options,
- THDVAR(get_thd(), trace_sst_api));
+ auto sst_info = std::make_shared<Rdb_sst_info>(
+ rdb, table_name, index_name, rdb_merge.get_cf(),
+ *rocksdb_db_options, THDVAR(get_thd(), trace_sst_api));
while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) {
- if ((rc2 = sst_info.put(merge_key, merge_val)) != 0) {
+ if ((rc2 = sst_info->put(merge_key, merge_val)) != 0) {
+ rc = rc2;
+
+ // Don't return yet - make sure we finish the sst_info
break;
}
}
- // rc2 == -1 => finished ok; rc2 > 0 => error
- if (rc2 > 0 || (rc2 = sst_info.commit(print_client_error)) != 0) {
- if (rc == 0) {
- rc = rc2;
- }
- break;
+ // -1 => no more items
+ if (rc2 != -1 && rc != 0) {
+ rc = rc2;
+ }
+
+ Rdb_sst_info::Rdb_sst_commit_info commit_info;
+ rc2 = sst_info->finish(&commit_info, print_client_error);
+ if (rc2 != 0 && rc == 0) {
+ // Only set the error from sst_info->finish if finish failed and we
+ // didn't fail before. In other words, we don't have finish's
+ // success mask earlier failures
+ rc = rc2;
+ }
+
+ if (rc) {
+ return rc;
+ }
+
+ if (commit_info.has_work()) {
+ sst_commit_list.emplace_back(std::move(commit_info));
+ DBUG_ASSERT(!commit_info.has_work());
}
}
- m_key_merge.clear();
+ }
- /*
- Explicitly tell jemalloc to clean up any unused dirty pages at this
- point.
- See https://reviews.facebook.net/D63723 for more details.
- */
- purge_all_jemalloc_arenas();
+ // Early return in case we lost the race completely and end up with no
+ // work at all
+ if (sst_commit_list.size() == 0) {
+ return rc;
}
+
+ // INGEST phase: Group all Rdb_sst_commit_info by cf (as they might
+ // have the same cf across different indexes) and call out to RocksDB
+ // to ingest all SST files in one atomic operation
+ rocksdb::IngestExternalFileOptions options;
+ options.move_files = true;
+ options.snapshot_consistency = false;
+ options.allow_global_seqno = false;
+ options.allow_blocking_flush = false;
+
+ std::map<rocksdb::ColumnFamilyHandle *, rocksdb::IngestExternalFileArg>
+ arg_map;
+
+ // Group by column_family
+ for (auto &commit_info : sst_commit_list) {
+ if (arg_map.find(commit_info.get_cf()) == arg_map.end()) {
+ rocksdb::IngestExternalFileArg arg;
+ arg.column_family = commit_info.get_cf(),
+ arg.external_files = commit_info.get_committed_files(),
+ arg.options = options;
+
+ arg_map.emplace(commit_info.get_cf(), arg);
+ } else {
+ auto &files = arg_map[commit_info.get_cf()].external_files;
+ files.insert(files.end(), commit_info.get_committed_files().begin(),
+ commit_info.get_committed_files().end());
+ }
+ }
+
+ std::vector<rocksdb::IngestExternalFileArg> args;
+ size_t file_count = 0;
+ for (auto &cf_files_pair : arg_map) {
+ args.push_back(cf_files_pair.second);
+ file_count += cf_files_pair.second.external_files.size();
+ }
+
+ const rocksdb::Status s = rdb->IngestExternalFiles(args);
+ if (THDVAR(m_thd, trace_sst_api)) {
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "SST Tracing: IngestExternalFile '%zu' files returned %s", file_count,
+ s.ok() ? "ok" : "not ok");
+ }
+
+ if (!s.ok()) {
+ if (print_client_error) {
+ Rdb_sst_info::report_error_msg(s, nullptr);
+ }
+ return HA_ERR_ROCKSDB_BULK_LOAD;
+ }
+
+ // COMMIT phase: mark everything as completed. This avoids SST file
+ // deletion kicking in. Otherwise SST files would get deleted if this
+ // entire operation is aborted
+ for (auto &commit_info : sst_commit_list) {
+ commit_info.commit();
+ }
+
return rc;
}
int start_bulk_load(ha_rocksdb *const bulk_load,
- std::shared_ptr<Rdb_sst_info> sst_info) {
+ std::shared_ptr<Rdb_sst_info> sst_info) {
/*
If we already have an open bulk load of a table and the name doesn't
match the current one, close out the currently running one. This allows
@@ -2484,8 +2860,6 @@ public:
bulk_load->get_table_basename() != m_curr_bulk_load_tablename) {
const auto res = finish_bulk_load();
if (res != HA_EXIT_SUCCESS) {
- m_curr_bulk_load.clear();
- m_curr_bulk_load_tablename.clear();
return res;
}
}
@@ -2535,12 +2909,10 @@ public:
inserts while inside a multi-statement transaction.
*/
bool flush_batch() {
- if (get_write_count() == 0)
- return false;
+ if (get_write_count() == 0) return false;
/* Commit the current transaction */
- if (commit_no_binlog())
- return true;
+ if (commit_no_binlog()) return true;
/* Start another one */
start_tx();
@@ -2552,7 +2924,7 @@ public:
std::max(m_auto_incr_map[gl_index_id], curr_id);
}
-#ifndef NDEBUG
+#ifndef DBUG_OFF
ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) {
if (m_auto_incr_map.count(gl_index_id) > 0) {
return m_auto_incr_map[gl_index_id];
@@ -2563,13 +2935,14 @@ public:
virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
- const rocksdb::Slice &value) = 0;
- virtual rocksdb::Status
- delete_key(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key) = 0;
- virtual rocksdb::Status
- single_delete(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key) = 0;
+ const rocksdb::Slice &value,
+ const bool assume_tracked) = 0;
+ virtual rocksdb::Status delete_key(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool assume_tracked) = 0;
+ virtual rocksdb::Status single_delete(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool assume_tracked) = 0;
virtual bool has_modifications() const = 0;
@@ -2585,25 +2958,23 @@ public:
virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
rocksdb::PinnableSlice *const value) const = 0;
- virtual rocksdb::Status
- get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
- bool exclusive) = 0;
-
- rocksdb::Iterator *
- get_iterator(rocksdb::ColumnFamilyHandle *const column_family,
- bool skip_bloom_filter, bool fill_cache,
- const rocksdb::Slice &eq_cond_lower_bound,
- const rocksdb::Slice &eq_cond_upper_bound,
- bool read_current = false, bool create_snapshot = true) {
+ virtual rocksdb::Status get_for_update(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
+ bool exclusive, const bool do_validate) = 0;
+
+ rocksdb::Iterator *get_iterator(
+ rocksdb::ColumnFamilyHandle *const column_family, bool skip_bloom_filter,
+ bool fill_cache, const rocksdb::Slice &eq_cond_lower_bound,
+ const rocksdb::Slice &eq_cond_upper_bound, bool read_current = false,
+ bool create_snapshot = true) {
// Make sure we are not doing both read_current (which implies we don't
// want a snapshot) and create_snapshot which makes sure we create
// a snapshot
DBUG_ASSERT(column_family != nullptr);
DBUG_ASSERT(!read_current || !create_snapshot);
- if (create_snapshot)
- acquire_snapshot(true);
+ if (create_snapshot) acquire_snapshot(true);
rocksdb::ReadOptions options = m_read_opts;
@@ -2635,25 +3006,33 @@ public:
entire transaction.
*/
do_set_savepoint();
- m_writes_at_last_savepoint= m_write_count;
+ m_writes_at_last_savepoint = m_write_count;
}
/*
Called when a "top-level" statement inside a transaction completes
successfully and its changes become part of the transaction's changes.
*/
- void make_stmt_savepoint_permanent() {
-
+ int make_stmt_savepoint_permanent() {
// Take another RocksDB savepoint only if we had changes since the last
// one. This is very important for long transactions doing lots of
// SELECTs.
- if (m_writes_at_last_savepoint != m_write_count)
- {
+ if (m_writes_at_last_savepoint != m_write_count) {
+ rocksdb::WriteBatchBase *batch = get_write_batch();
+ rocksdb::Status status = rocksdb::Status::NotFound();
+ while ((status = batch->PopSavePoint()) == rocksdb::Status::OK()) {
+ }
+
+ if (status != rocksdb::Status::NotFound()) {
+ return HA_EXIT_FAILURE;
+ }
+
do_set_savepoint();
- m_writes_at_last_savepoint= m_write_count;
+ m_writes_at_last_savepoint = m_write_count;
}
- }
+ return HA_EXIT_SUCCESS;
+ }
/*
Rollback to the savepoint we've set before the last statement
@@ -2669,7 +3048,7 @@ public:
statement start) because setting a savepoint is cheap.
*/
do_set_savepoint();
- m_writes_at_last_savepoint= m_write_count;
+ m_writes_at_last_savepoint = m_write_count;
}
}
@@ -2733,10 +3112,11 @@ class Rdb_transaction_impl : public Rdb_transaction {
rocksdb::Transaction *m_rocksdb_tx = nullptr;
rocksdb::Transaction *m_rocksdb_reuse_tx = nullptr;
-public:
+ public:
void set_lock_timeout(int timeout_sec_arg) override {
- if (m_rocksdb_tx)
+ if (m_rocksdb_tx) {
m_rocksdb_tx->SetLockTimeout(rdb_convert_sec_to_ms(m_timeout_sec));
+ }
}
void set_sync(bool sync) override {
@@ -2753,7 +3133,7 @@ public:
virtual bool is_writebatch_trx() const override { return false; }
-private:
+ private:
void release_tx(void) {
// We are done with the current active transaction object. Preserve it
// for later reuse.
@@ -2803,7 +3183,7 @@ private:
goto error;
}
-error:
+ error:
/* Save the transaction object to be reused */
release_tx();
@@ -2817,7 +3197,7 @@ error:
return res;
}
-public:
+ public:
void rollback() override {
m_write_count = 0;
m_insert_count = 0;
@@ -2884,39 +3264,42 @@ public:
m_read_opts.snapshot = nullptr;
}
- if (need_clear && m_rocksdb_tx != nullptr)
- m_rocksdb_tx->ClearSnapshot();
+ if (need_clear && m_rocksdb_tx != nullptr) m_rocksdb_tx->ClearSnapshot();
}
bool has_snapshot() { return m_read_opts.snapshot != nullptr; }
rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key,
- const rocksdb::Slice &value) override {
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ const bool assume_tracked) override {
++m_write_count;
++m_lock_count;
- if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks)
+ if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) {
return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
- return m_rocksdb_tx->Put(column_family, key, value);
+ }
+ return m_rocksdb_tx->Put(column_family, key, value, assume_tracked);
}
rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key) override {
+ const rocksdb::Slice &key,
+ const bool assume_tracked) override {
++m_write_count;
++m_lock_count;
- if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks)
+ if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) {
return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
- return m_rocksdb_tx->Delete(column_family, key);
+ }
+ return m_rocksdb_tx->Delete(column_family, key, assume_tracked);
}
- rocksdb::Status
- single_delete(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key) override {
+ rocksdb::Status single_delete(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool assume_tracked) override {
++m_write_count;
++m_lock_count;
- if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks)
+ if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) {
return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
- return m_rocksdb_tx->SingleDelete(column_family, key);
+ }
+ return m_rocksdb_tx->SingleDelete(column_family, key, assume_tracked);
}
bool has_modifications() const override {
@@ -2952,23 +3335,39 @@ public:
return m_rocksdb_tx->Get(m_read_opts, column_family, key, value);
}
- rocksdb::Status
- get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
- bool exclusive) override {
- if (++m_lock_count > m_max_row_locks)
+ rocksdb::Status get_for_update(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
+ bool exclusive, const bool do_validate) override {
+ if (++m_lock_count > m_max_row_locks) {
return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit);
+ }
if (value != nullptr) {
value->Reset();
}
- return m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value,
- exclusive);
+ rocksdb::Status s;
+ // If snapshot is null, pass it to GetForUpdate and snapshot is
+ // initialized there. Snapshot validation is skipped in that case.
+ if (m_read_opts.snapshot == nullptr || do_validate) {
+ s = m_rocksdb_tx->GetForUpdate(
+ m_read_opts, column_family, key, value, exclusive,
+ m_read_opts.snapshot ? do_validate : false);
+ } else {
+ // If snapshot is set, and if skipping validation,
+ // call GetForUpdate without validation and set back old snapshot
+ auto saved_snapshot = m_read_opts.snapshot;
+ m_read_opts.snapshot = nullptr;
+ s = m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value,
+ exclusive, false);
+ m_read_opts.snapshot = saved_snapshot;
+ }
+ return s;
}
- rocksdb::Iterator *
- get_iterator(const rocksdb::ReadOptions &options,
- rocksdb::ColumnFamilyHandle *const column_family) override {
+ rocksdb::Iterator *get_iterator(
+ const rocksdb::ReadOptions &options,
+ rocksdb::ColumnFamilyHandle *const column_family) override {
global_stats.queries[QUERIES_RANGE].inc();
return m_rocksdb_tx->GetIterator(options, column_family);
}
@@ -3013,10 +3412,9 @@ public:
m_ddl_transaction = false;
}
- /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints */
- void do_set_savepoint() override {
- m_rocksdb_tx->SetSavePoint();
- }
+ /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints
+ */
+ void do_set_savepoint() override { m_rocksdb_tx->SetSavePoint(); }
void do_rollback_to_savepoint() override {
m_rocksdb_tx->RollbackToSavePoint();
@@ -3048,14 +3446,14 @@ public:
const rocksdb::Snapshot *const cur_snapshot = m_rocksdb_tx->GetSnapshot();
if (org_snapshot != cur_snapshot) {
- if (org_snapshot != nullptr)
- m_snapshot_timestamp = 0;
+ if (org_snapshot != nullptr) m_snapshot_timestamp = 0;
m_read_opts.snapshot = cur_snapshot;
- if (cur_snapshot != nullptr)
+ if (cur_snapshot != nullptr) {
rdb->GetEnv()->GetCurrentTime(&m_snapshot_timestamp);
- else
+ } else {
m_is_delayed_snapshot = true;
+ }
}
}
}
@@ -3066,7 +3464,7 @@ public:
m_notifier = std::make_shared<Rdb_snapshot_notifier>(this);
}
- virtual ~Rdb_transaction_impl() {
+ virtual ~Rdb_transaction_impl() override {
rollback();
// Theoretically the notifier could outlive the Rdb_transaction_impl
@@ -3098,7 +3496,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
m_ddl_transaction = false;
}
-private:
+ private:
bool prepare(const rocksdb::TransactionName &name) override { return true; }
bool commit_no_binlog() override {
@@ -3122,7 +3520,7 @@ private:
res = true;
goto error;
}
-error:
+ error:
reset();
m_write_count = 0;
@@ -3135,16 +3533,12 @@ error:
}
/* Implementations of do_*savepoint based on rocksdB::WriteBatch savepoints */
- void do_set_savepoint() override {
- m_batch->SetSavePoint();
- }
+ void do_set_savepoint() override { m_batch->SetSavePoint(); }
- void do_rollback_to_savepoint() override {
- m_batch->RollbackToSavePoint();
- }
+ void do_rollback_to_savepoint() override { m_batch->RollbackToSavePoint(); }
-public:
+ public:
bool is_writebatch_trx() const override { return true; }
void set_lock_timeout(int timeout_sec_arg) override {
@@ -3172,8 +3566,7 @@ public:
}
void acquire_snapshot(bool acquire_now) override {
- if (m_read_opts.snapshot == nullptr)
- snapshot_created(rdb->GetSnapshot());
+ if (m_read_opts.snapshot == nullptr) snapshot_created(rdb->GetSnapshot());
}
void release_snapshot() override {
@@ -3184,8 +3577,8 @@ public:
}
rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key,
- const rocksdb::Slice &value) override {
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ const bool assume_tracked) override {
++m_write_count;
m_batch->Put(column_family, key, value);
// Note Put/Delete in write batch doesn't return any error code. We simply
@@ -3194,15 +3587,16 @@ public:
}
rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key) override {
+ const rocksdb::Slice &key,
+ const bool assume_tracked) override {
++m_write_count;
m_batch->Delete(column_family, key);
return rocksdb::Status::OK();
}
- rocksdb::Status
- single_delete(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key) override {
+ rocksdb::Status single_delete(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, const bool /* assume_tracked */) override {
++m_write_count;
m_batch->SingleDelete(column_family, key);
return rocksdb::Status::OK();
@@ -3227,10 +3621,10 @@ public:
value);
}
- rocksdb::Status
- get_for_update(rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
- bool exclusive) override {
+ rocksdb::Status get_for_update(
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
+ bool /* exclusive */, const bool /* do_validate */) override {
if (value == nullptr) {
rocksdb::PinnableSlice pin_val;
rocksdb::Status s = get(column_family, key, &pin_val);
@@ -3241,9 +3635,9 @@ public:
return get(column_family, key, value);
}
- rocksdb::Iterator *
- get_iterator(const rocksdb::ReadOptions &options,
- rocksdb::ColumnFamilyHandle *const column_family) override {
+ rocksdb::Iterator *get_iterator(
+ const rocksdb::ReadOptions &options,
+ rocksdb::ColumnFamilyHandle *const /* column_family */) override {
const auto it = rdb->NewIterator(options);
return m_batch->NewIteratorWithBase(it);
}
@@ -3264,8 +3658,7 @@ public:
void start_stmt() override {}
void rollback_stmt() override {
- if (m_batch)
- rollback_to_stmt_savepoint();
+ if (m_batch) rollback_to_stmt_savepoint();
}
explicit Rdb_writebatch_impl(THD *const thd)
@@ -3274,7 +3667,7 @@ public:
true);
}
- virtual ~Rdb_writebatch_impl() {
+ virtual ~Rdb_writebatch_impl() override {
rollback();
delete m_batch;
}
@@ -3332,7 +3725,7 @@ class Rdb_perf_context_guard {
}
};
-} // anonymous namespace
+} // anonymous namespace
/*
TODO: maybe, call this in external_lock() and store in ha_rocksdb..
@@ -3344,12 +3737,10 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) {
if (tx == nullptr) {
bool rpl_skip_tx_api= false; // MARIAROCKS_NOT_YET.
if ((rpl_skip_tx_api && thd->rgi_slave) ||
- false /* MARIAROCKS_NOT_YET: THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave)*/)
+ (THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave))
{
tx = new Rdb_writebatch_impl(thd);
- }
- else
- {
+ } else {
tx = new Rdb_transaction_impl(thd);
}
tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks));
@@ -3368,12 +3759,14 @@ static Rdb_transaction *get_or_create_tx(THD *const thd) {
static int rocksdb_close_connection(handlerton *const hton, THD *const thd) {
Rdb_transaction *tx = get_tx_from_thd(thd);
if (tx != nullptr) {
- int rc = tx->finish_bulk_load(false);
- if (rc != 0) {
+ bool is_critical_error;
+ int rc = tx->finish_bulk_load(&is_critical_error, false);
+ if (rc != 0 && is_critical_error) {
// NO_LINT_DEBUG
- sql_print_error("RocksDB: Error %d finalizing last SST file while "
- "disconnecting",
- rc);
+ sql_print_error(
+ "RocksDB: Error %d finalizing last SST file while "
+ "disconnecting",
+ rc);
}
delete tx;
@@ -3514,9 +3907,9 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx)
}
DEBUG_SYNC(thd, "rocksdb.prepared");
- }
- else
+ } else {
tx->make_stmt_savepoint_permanent();
+ }
return HA_EXIT_SUCCESS;
}
@@ -3557,9 +3950,8 @@ static int rocksdb_commit_by_xid(handlerton *const hton, XID *const xid) {
DBUG_RETURN(HA_EXIT_SUCCESS);
}
-static int
-rocksdb_rollback_by_xid(handlerton *const hton MY_ATTRIBUTE((__unused__)),
- XID *const xid) {
+static int rocksdb_rollback_by_xid(
+ handlerton *const hton MY_ATTRIBUTE((__unused__)), XID *const xid) {
DBUG_ENTER_FUNC();
DBUG_ASSERT(hton != nullptr);
@@ -3605,6 +3997,7 @@ static void rdb_xid_from_string(const std::string &src, XID *const dst) {
DBUG_ASSERT(dst->gtrid_length >= 0 && dst->gtrid_length <= MAXGTRIDSIZE);
DBUG_ASSERT(dst->bqual_length >= 0 && dst->bqual_length <= MAXBQUALSIZE);
+ memset(dst->data, 0, XIDDATASIZE);
src.copy(dst->data, (dst->gtrid_length) + (dst->bqual_length),
RDB_XIDHDR_LEN);
}
@@ -3629,13 +4022,16 @@ static int rocksdb_recover(handlerton* hton, XID* xid_list, uint len)
if (is_binlog_advanced(binlog_file, *binlog_pos, file_buf, pos)) {
memcpy(binlog_file, file_buf, FN_REFLEN + 1);
*binlog_pos = pos;
- fprintf(stderr, "RocksDB: Last binlog file position %llu,"
- " file name %s\n",
+ // NO_LINT_DEBUG
+ fprintf(stderr,
+ "RocksDB: Last binlog file position %llu,"
+ " file name %s\n",
pos, file_buf);
if (*gtid_buf) {
global_sid_lock->rdlock();
binlog_max_gtid->parse(global_sid_map, gtid_buf);
global_sid_lock->unlock();
+ // NO_LINT_DEBUG
fprintf(stderr, "RocksDB: Last MySQL Gtid %s\n", gtid_buf);
}
}
@@ -3733,8 +4129,8 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
if (tx != nullptr) {
- if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT |
- OPTION_BEGIN))) {
+ if (commit_tx || (!my_core::thd_test_options(
+ thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
/*
This will not add anything to commit_latency_stats, and this is correct
right?
@@ -3865,7 +4261,7 @@ static std::string format_string(const char *const format, ...) {
char *buff = static_buff;
std::unique_ptr<char[]> dynamic_buff = nullptr;
- len++; // Add one for null terminator
+ len++; // Add one for null terminator
// for longer output use an allocated buffer
if (static_cast<uint>(len) > sizeof(static_buff)) {
@@ -3890,7 +4286,7 @@ static std::string format_string(const char *const format, ...) {
}
class Rdb_snapshot_status : public Rdb_tx_list_walker {
-private:
+ private:
std::string m_data;
static std::string current_timestamp(void) {
@@ -3924,9 +4320,8 @@ private:
"=========================================\n";
}
- static Rdb_deadlock_info::Rdb_dl_trx_info
- get_dl_txn_info(const rocksdb::DeadlockInfo &txn,
- const GL_INDEX_ID &gl_index_id) {
+ static Rdb_deadlock_info::Rdb_dl_trx_info get_dl_txn_info(
+ const rocksdb::DeadlockInfo &txn, const GL_INDEX_ID &gl_index_id) {
Rdb_deadlock_info::Rdb_dl_trx_info txn_data;
txn_data.trx_id = txn.m_txn_id;
@@ -3953,13 +4348,12 @@ private:
return txn_data;
}
- static Rdb_deadlock_info
- get_dl_path_trx_info(const rocksdb::DeadlockPath &path_entry) {
+ static Rdb_deadlock_info get_dl_path_trx_info(
+ const rocksdb::DeadlockPath &path_entry) {
Rdb_deadlock_info deadlock_info;
- for (auto it = path_entry.path.begin(); it != path_entry.path.end();
- it++) {
- auto txn = *it;
+ for (auto it = path_entry.path.begin(); it != path_entry.path.end(); it++) {
+ const auto &txn = *it;
const GL_INDEX_ID gl_index_id = {
txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
txn.m_waiting_key.c_str()))};
@@ -3968,7 +4362,7 @@ private:
DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
/* print the first txn in the path to display the full deadlock cycle */
if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
- auto deadlocking_txn = *(path_entry.path.end() - 1);
+ const auto &deadlocking_txn = *(path_entry.path.end() - 1);
deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id;
deadlock_info.deadlock_time = path_entry.deadlock_time;
}
@@ -3997,7 +4391,7 @@ private:
#endif
m_data += format_string(
"---SNAPSHOT, ACTIVE %lld sec\n"
- "%s\n"
+ "%s\n"
"lock count %llu, write count %llu\n"
"insert count %llu, update count %llu, delete count %llu\n",
(longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(),
@@ -4010,19 +4404,21 @@ private:
auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
m_data += "----------LATEST DETECTED DEADLOCKS----------\n";
- for (auto path_entry : dlock_buffer) {
+ for (const auto &path_entry : dlock_buffer) {
std::string path_data;
if (path_entry.limit_exceeded) {
path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
} else {
- path_data += "\n*** DEADLOCK PATH\n"
- "=========================================\n";
+ path_data +=
+ "\n*** DEADLOCK PATH\n"
+ "=========================================\n";
const auto dl_info = get_dl_path_trx_info(path_entry);
const auto deadlock_time = dl_info.deadlock_time;
for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) {
- const auto trx_info = *it;
+ const auto &trx_info = *it;
path_data += format_string(
- "TIMESTAMP: %" PRId64 "\n"
+ "TIMESTAMP: %" PRId64
+ "\n"
"TRANSACTION ID: %u\n"
"COLUMN FAMILY NAME: %s\n"
"WAITING KEY: %s\n"
@@ -4037,9 +4433,9 @@ private:
path_data += "---------------WAITING FOR---------------\n";
}
}
- path_data +=
- format_string("\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n",
- dl_info.victim_trx_id);
+ path_data += format_string(
+ "\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n",
+ dl_info.victim_trx_id);
}
m_data += path_data;
}
@@ -4048,7 +4444,7 @@ private:
std::vector<Rdb_deadlock_info> get_deadlock_info() {
std::vector<Rdb_deadlock_info> deadlock_info;
auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
- for (auto path_entry : dlock_buffer) {
+ for (const auto &path_entry : dlock_buffer) {
if (!path_entry.limit_exceeded) {
deadlock_info.push_back(get_dl_path_trx_info(path_entry));
}
@@ -4063,10 +4459,10 @@ private:
* out relevant information for information_schema.rocksdb_trx
*/
class Rdb_trx_info_aggregator : public Rdb_tx_list_walker {
-private:
+ private:
std::vector<Rdb_trx_info> *m_trx_info;
-public:
+ public:
explicit Rdb_trx_info_aggregator(std::vector<Rdb_trx_info> *const trx_info)
: m_trx_info(trx_info) {}
@@ -4197,9 +4593,10 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
// sure that output will look unified.
DBUG_ASSERT(commit_latency_stats != nullptr);
- snprintf(buf, sizeof(buf), "rocksdb.commit_latency statistics "
- "Percentiles :=> 50 : %.2f 95 : %.2f "
- "99 : %.2f 100 : %.2f\n",
+ snprintf(buf, sizeof(buf),
+ "rocksdb.commit_latency statistics "
+ "Percentiles :=> 50 : %.2f 95 : %.2f "
+ "99 : %.2f 100 : %.2f\n",
commit_latency_stats->Percentile(50),
commit_latency_stats->Percentile(95),
commit_latency_stats->Percentile(99),
@@ -4221,7 +4618,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
}
if (rdb->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)) {
- snprintf(buf, sizeof(buf), "rocksdb.actual_delayed_write_rate "
+ snprintf(buf, sizeof(buf),
"COUNT : %llu\n",
(ulonglong)v);
str.append(buf);
@@ -4309,6 +4706,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list);
if (!s.ok()) {
+ // NO_LINT_DEBUG
sql_print_error("RocksDB: Returned error (%s) from GetThreadList.\n",
s.ToString().c_str());
res |= true;
@@ -4325,37 +4723,23 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
"\noperation_type: " + it.GetOperationName(it.operation_type) +
"\noperation_stage: " +
it.GetOperationStageName(it.operation_stage) +
- "\nelapsed_time_ms: " +
- it.MicrosToString(it.op_elapsed_micros);
+ "\nelapsed_time_ms: " + it.MicrosToString(it.op_elapsed_micros);
- for (auto &it_props :
- it.InterpretOperationProperties(it.operation_type,
- it.op_properties)) {
+ for (auto &it_props : it.InterpretOperationProperties(
+ it.operation_type, it.op_properties)) {
str += "\n" + it_props.first + ": " + std::to_string(it_props.second);
}
str += "\nstate_type: " + it.GetStateName(it.state_type);
- res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id),
- str, stat_print);
+ res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id), str,
+ stat_print);
}
}
#ifdef MARIAROCKS_NOT_YET
/* Explicit snapshot information */
- str.clear();
- {
- std::lock_guard<std::mutex> lock(explicit_snapshot_mutex);
- for (const auto &elem : explicit_snapshots) {
- const auto &ss = elem.second.lock();
- DBUG_ASSERT(ss != nullptr);
- const auto &info = ss->ss_info;
- str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) +
- "\nBinlog File: " + info.binlog_file +
- "\nBinlog Pos: " + std::to_string(info.binlog_pos) +
- "\nGtid Executed: " + info.gtid_executed + "\n";
- }
- }
+ str = Rdb_explicit_snapshot::dump_snapshots();
#endif
if (!str.empty()) {
@@ -4390,38 +4774,38 @@ static bool rocksdb_explicit_snapshot(
snapshot_info_st *ss_info) /*!< out: Snapshot information */
{
switch (ss_info->op) {
- case snapshot_operation::SNAPSHOT_CREATE: {
- if (mysql_bin_log_is_open()) {
- mysql_bin_log_lock_commits(ss_info);
+ case snapshot_operation::SNAPSHOT_CREATE: {
+ if (mysql_bin_log_is_open()) {
+ mysql_bin_log_lock_commits(ss_info);
+ }
+ auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot());
+ if (mysql_bin_log_is_open()) {
+ mysql_bin_log_unlock_commits(ss_info);
+ }
+
+ thd->set_explicit_snapshot(s);
+ return s == nullptr;
}
- auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot());
- if (mysql_bin_log_is_open()) {
- mysql_bin_log_unlock_commits(ss_info);
+ case snapshot_operation::SNAPSHOT_ATTACH: {
+ auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id);
+ if (!s) {
+ return true;
+ }
+ *ss_info = s->ss_info;
+ thd->set_explicit_snapshot(s);
+ return false;
}
-
- thd->set_explicit_snapshot(s);
- return s == nullptr;
- }
- case snapshot_operation::SNAPSHOT_ATTACH: {
- auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id);
- if (!s) {
- return true;
+ case snapshot_operation::SNAPSHOT_RELEASE: {
+ if (!thd->get_explicit_snapshot()) {
+ return true;
+ }
+ *ss_info = thd->get_explicit_snapshot()->ss_info;
+ thd->set_explicit_snapshot(nullptr);
+ return false;
}
- *ss_info = s->ss_info;
- thd->set_explicit_snapshot(s);
- return false;
- }
- case snapshot_operation::SNAPSHOT_RELEASE: {
- if (!thd->get_explicit_snapshot()) {
+ default:
+ DBUG_ASSERT(false);
return true;
- }
- *ss_info = thd->get_explicit_snapshot()->ss_info;
- thd->set_explicit_snapshot(nullptr);
- return false;
- }
- default:
- DBUG_ASSERT(false);
- return true;
}
return true;
}
@@ -4567,7 +4951,7 @@ static int rocksdb_start_tx_with_shared_read_view(
// case: an explicit snapshot was not assigned to this transaction
if (!tx->m_explicit_snapshot) {
tx->m_explicit_snapshot =
- Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot);
+ Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot);
if (!tx->m_explicit_snapshot) {
my_printf_error(ER_UNKNOWN_ERROR, "Could not create snapshot", MYF(0));
error = HA_EXIT_FAILURE;
@@ -4611,9 +4995,8 @@ static int rocksdb_rollback_to_savepoint(handlerton *const hton, THD *const thd,
return tx->rollback_to_savepoint(savepoint);
}
-static bool
-rocksdb_rollback_to_savepoint_can_release_mdl(handlerton *const hton,
- THD *const thd) {
+static bool rocksdb_rollback_to_savepoint_can_release_mdl(
+ handlerton *const /* hton */, THD *const /* thd */) {
return true;
}
@@ -4661,7 +5044,7 @@ static void rocksdb_update_table_stats(
/* Function needs to return void because of the interface and we've
* detected an error which shouldn't happen. There's no way to let
* caller know that something failed.
- */
+ */
SHIP_ASSERT(false);
return;
}
@@ -4741,8 +5124,9 @@ static rocksdb::Status check_rocksdb_options_compatibility(
}
if (loaded_cf_descs.size() != cf_descr.size()) {
- return rocksdb::Status::NotSupported("Mismatched size of column family "
- "descriptors.");
+ return rocksdb::Status::NotSupported(
+ "Mismatched size of column family "
+ "descriptors.");
}
// Please see RocksDB documentation for more context about why we need to set
@@ -4792,17 +5176,22 @@ static int rocksdb_init_func(void *const p) {
}
if (rdb_check_rocksdb_corruption()) {
- sql_print_error("RocksDB: There was a corruption detected in RockDB files. "
- "Check error log emitted earlier for more details.");
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: There was a corruption detected in RockDB files. "
+ "Check error log emitted earlier for more details.");
if (rocksdb_allow_to_start_after_corruption) {
+ // NO_LINT_DEBUG
sql_print_information(
"RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent "
"server operating if RocksDB corruption is detected.");
} else {
- sql_print_error("RocksDB: The server will exit normally and stop restart "
- "attempts. Remove %s file from data directory and "
- "start mysqld manually.",
- rdb_corruption_marker_file_name().c_str());
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: The server will exit normally and stop restart "
+ "attempts. Remove %s file from data directory and "
+ "start mysqld manually.",
+ rdb_corruption_marker_file_name().c_str());
exit(0);
}
}
@@ -4813,8 +5202,10 @@ static int rocksdb_init_func(void *const p) {
init_rocksdb_psi_keys();
rocksdb_hton = (handlerton *)p;
- mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &rdb_open_tables.m_mutex,
- MY_MUTEX_INIT_FAST);
+
+ rdb_open_tables.init();
+ Ensure_cleanup rdb_open_tables_cleanup([]() { rdb_open_tables.free(); });
+
#ifdef HAVE_PSI_INTERFACE
rdb_bg_thread.init(rdb_signal_bg_psi_mutex_key, rdb_signal_bg_psi_cond_key);
rdb_drop_idx_thread.init(rdb_signal_drop_idx_psi_mutex_key,
@@ -4885,6 +5276,8 @@ static int rocksdb_init_func(void *const p) {
/*
Not needed in MariaDB:
rocksdb_hton->flush_logs = rocksdb_flush_wal;
+ rocksdb_hton->handle_single_table_select = rocksdb_handle_single_table_select;
+
*/
rocksdb_hton->flags = HTON_TEMPORARY_NOT_SUPPORTED |
@@ -4894,16 +5287,25 @@ static int rocksdb_init_func(void *const p) {
DBUG_ASSERT(!mysqld_embedded);
if (rocksdb_db_options->max_open_files > (long)open_files_limit) {
- sql_print_information("RocksDB: rocksdb_max_open_files should not be "
- "greater than the open_files_limit, effective value "
- "of rocksdb_max_open_files is being set to "
- "open_files_limit / 2.");
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: rocksdb_max_open_files should not be "
+ "greater than the open_files_limit, effective value "
+ "of rocksdb_max_open_files is being set to "
+ "open_files_limit / 2.");
rocksdb_db_options->max_open_files = open_files_limit / 2;
} else if (rocksdb_db_options->max_open_files == -2) {
rocksdb_db_options->max_open_files = open_files_limit / 2;
}
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+ rdb_read_free_regex_handler.set_patterns(DEFAULT_READ_FREE_RPL_TABLES);
+#endif
+
rocksdb_stats = rocksdb::CreateDBStatistics();
+ rocksdb_stats->set_stats_level(
+ static_cast<rocksdb::StatsLevel>(rocksdb_stats_level));
+ rocksdb_stats_level = rocksdb_stats->get_stats_level();
rocksdb_db_options->statistics = rocksdb_stats;
if (rocksdb_rate_limiter_bytes_per_sec != 0) {
@@ -4937,13 +5339,15 @@ static int rocksdb_init_func(void *const p) {
rocksdb_db_options->use_direct_reads) {
// allow_mmap_reads implies !use_direct_reads and RocksDB will not open if
// mmap_reads and direct_reads are both on. (NO_LINT_DEBUG)
- sql_print_error("RocksDB: Can't enable both use_direct_reads "
- "and allow_mmap_reads\n");
+ sql_print_error(
+ "RocksDB: Can't enable both use_direct_reads "
+ "and allow_mmap_reads\n");
DBUG_RETURN(HA_EXIT_FAILURE);
}
// Check whether the filesystem backing rocksdb_datadir allows O_DIRECT
- if (rocksdb_db_options->use_direct_reads) {
+ if (rocksdb_db_options->use_direct_reads ||
+ rocksdb_db_options->use_direct_io_for_flush_and_compaction) {
rocksdb::EnvOptions soptions;
rocksdb::Status check_status;
rocksdb::Env *const env = rocksdb_db_options->env;
@@ -4964,9 +5368,11 @@ static int rocksdb_init_func(void *const p) {
}
if (!check_status.ok()) {
- sql_print_error("RocksDB: Unable to use direct io in rocksdb-datadir:"
- "(%s)", check_status.getState());
- rdb_open_tables.free_hash();
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Unable to use direct io in rocksdb-datadir:"
+ "(%s)",
+ check_status.getState());
DBUG_RETURN(HA_EXIT_FAILURE);
}
}
@@ -4974,17 +5380,19 @@ static int rocksdb_init_func(void *const p) {
if (rocksdb_db_options->allow_mmap_writes &&
rocksdb_db_options->use_direct_io_for_flush_and_compaction) {
// See above comment for allow_mmap_reads. (NO_LINT_DEBUG)
- sql_print_error("RocksDB: Can't enable both "
- "use_direct_io_for_flush_and_compaction and "
- "allow_mmap_writes\n");
+ sql_print_error(
+ "RocksDB: Can't enable both "
+ "use_direct_io_for_flush_and_compaction and "
+ "allow_mmap_writes\n");
DBUG_RETURN(HA_EXIT_FAILURE);
}
if (rocksdb_db_options->allow_mmap_writes &&
rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
// NO_LINT_DEBUG
- sql_print_error("RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 "
- "to use allow_mmap_writes");
+ sql_print_error(
+ "RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 "
+ "to use allow_mmap_writes");
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -5011,15 +5419,19 @@ static int rocksdb_init_func(void *const p) {
#endif
) {
sql_print_information("RocksDB: Got ENOENT when listing column families");
+
+ // NO_LINT_DEBUG
sql_print_information(
"RocksDB: assuming that we're creating a new database");
} else {
rdb_log_status_error(status, "Error listing column families");
DBUG_RETURN(HA_EXIT_FAILURE);
}
- } else
+ } else {
+ // NO_LINT_DEBUG
sql_print_information("RocksDB: %ld column families found",
cf_names.size());
+ }
std::vector<rocksdb::ColumnFamilyDescriptor> cf_descr;
std::vector<rocksdb::ColumnFamilyHandle *> cf_handles;
@@ -5028,9 +5440,33 @@ static int rocksdb_init_func(void *const p) {
(rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type;
if (!rocksdb_tbl_options->no_block_cache) {
- std::shared_ptr<rocksdb::Cache> block_cache = rocksdb_use_clock_cache
- ? rocksdb::NewClockCache(rocksdb_block_cache_size)
- : rocksdb::NewLRUCache(rocksdb_block_cache_size);
+ std::shared_ptr<rocksdb::MemoryAllocator> memory_allocator;
+ if (!rocksdb_cache_dump) {
+ size_t block_size = rocksdb_tbl_options->block_size;
+ rocksdb::JemallocAllocatorOptions alloc_opt;
+ // Limit jemalloc tcache memory usage. The range
+ // [block_size/4, block_size] should be enough to cover most of
+ // block cache allocation sizes.
+ alloc_opt.limit_tcache_size = true;
+ alloc_opt.tcache_size_lower_bound = block_size / 4;
+ alloc_opt.tcache_size_upper_bound = block_size;
+ rocksdb::Status new_alloc_status =
+ rocksdb::NewJemallocNodumpAllocator(alloc_opt, &memory_allocator);
+ if (!new_alloc_status.ok()) {
+ // Fallback to use default malloc/free.
+ rdb_log_status_error(new_alloc_status,
+ "Error excluding block cache from core dump");
+ memory_allocator = nullptr;
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+ }
+ std::shared_ptr<rocksdb::Cache> block_cache =
+ rocksdb_use_clock_cache
+ ? rocksdb::NewClockCache(rocksdb_block_cache_size)
+ : rocksdb::NewLRUCache(
+ rocksdb_block_cache_size, -1 /*num_shard_bits*/,
+ false /*strict_capcity_limit*/,
+ rocksdb_cache_high_pri_pool_ratio, memory_allocator);
if (rocksdb_sim_cache_size > 0) {
// Simulated cache enabled
// Wrap block cache inside a simulated cache and pass it to RocksDB
@@ -5065,7 +5501,7 @@ static int rocksdb_init_func(void *const p) {
if (rocksdb_persistent_cache_size_mb > 0) {
std::shared_ptr<rocksdb::PersistentCache> pcache;
- uint64_t cache_size_bytes= rocksdb_persistent_cache_size_mb * 1024 * 1024;
+ uint64_t cache_size_bytes = rocksdb_persistent_cache_size_mb * 1024 * 1024;
status = rocksdb::NewPersistentCache(
rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path),
cache_size_bytes, myrocks_logger, true, &pcache);
@@ -5077,6 +5513,7 @@ static int rocksdb_init_func(void *const p) {
}
rocksdb_tbl_options->persistent_cache = pcache;
} else if (strlen(rocksdb_persistent_cache_path)) {
+ // NO_LINT_DEBUG
sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb");
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -5094,17 +5531,23 @@ static int rocksdb_init_func(void *const p) {
If there are no column families, we're creating the new database.
Create one column family named "default".
*/
- if (cf_names.size() == 0)
- cf_names.push_back(DEFAULT_CF_NAME);
+ if (cf_names.size() == 0) cf_names.push_back(DEFAULT_CF_NAME);
std::vector<int> compaction_enabled_cf_indices;
+
+ // NO_LINT_DEBUG
sql_print_information("RocksDB: Column Families at start:");
for (size_t i = 0; i < cf_names.size(); ++i) {
rocksdb::ColumnFamilyOptions opts;
cf_options_map->get_cf_options(cf_names[i], &opts);
+ // NO_LINT_DEBUG
sql_print_information(" cf=%s", cf_names[i].c_str());
+
+ // NO_LINT_DEBUG
sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size);
+
+ // NO_LINT_DEBUG
sql_print_information(" target_file_size_base=%" PRIu64,
opts.target_file_size_base);
@@ -5185,25 +5628,27 @@ static int rocksdb_init_func(void *const p) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
- auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME
-#ifdef HAVE_PSI_INTERFACE
- ,
- rdb_background_psi_thread_key
+#ifndef HAVE_PSI_INTERFACE
+ auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME);
+#else
+ auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME,
+ rdb_background_psi_thread_key);
#endif
- );
if (err != 0) {
+ // NO_LINT_DEBUG
sql_print_error("RocksDB: Couldn't start the background thread: (errno=%d)",
err);
DBUG_RETURN(HA_EXIT_FAILURE);
}
- err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME
-#ifdef HAVE_PSI_INTERFACE
- ,
- rdb_drop_idx_psi_thread_key
+#ifndef HAVE_PSI_INTERFACE
+ err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME);
+#else
+ err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME,
+ rdb_drop_idx_psi_thread_key);
#endif
- );
if (err != 0) {
+ // NO_LINT_DEBUG
sql_print_error("RocksDB: Couldn't start the drop index thread: (errno=%d)",
err);
DBUG_RETURN(HA_EXIT_FAILURE);
@@ -5220,7 +5665,6 @@ static int rocksdb_init_func(void *const p) {
sql_print_error(
"RocksDB: Couldn't start the manual compaction thread: (errno=%d)",
err);
- rdb_open_tables.free_hash();
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -5254,7 +5698,6 @@ static int rocksdb_init_func(void *const p) {
if (err != 0) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Couldn't initialize error messages");
- rdb_open_tables.m_hash.~Rdb_table_set();
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -5277,13 +5720,17 @@ static int rocksdb_init_func(void *const p) {
}
#if !defined(_WIN32) && !defined(__APPLE__)
- io_watchdog = new Rdb_io_watchdog(directories);
+ io_watchdog = new Rdb_io_watchdog(std::move(directories));
io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs);
#endif
// NO_LINT_DEBUG
- sql_print_information("MyRocks storage engine plugin has been successfully "
- "initialized.");
+ sql_print_information(
+ "MyRocks storage engine plugin has been successfully "
+ "initialized.");
+
+ // Skip cleaning up rdb_open_tables as we've succeeded
+ rdb_open_tables_cleanup.skip();
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -5340,18 +5787,18 @@ static int rocksdb_done_func(void *const p) {
"RocksDB: Couldn't stop the manual compaction thread: (errno=%d)", err);
}
- if (rdb_open_tables.m_hash.size()) {
+ if (rdb_open_tables.count()) {
// Looks like we are getting unloaded and yet we have some open tables
// left behind.
error = 1;
}
+ rdb_open_tables.free();
/*
destructors for static objects can be called at _exit(),
but we want to free the memory at dlclose()
*/
- rdb_open_tables.m_hash.~Rdb_table_set();
- mysql_mutex_destroy(&rdb_open_tables.m_mutex);
+ // MARIADB_MERGE_2019: rdb_open_tables.m_hash.~Rdb_table_set();
mysql_mutex_destroy(&rdb_sysvars_mutex);
mysql_mutex_destroy(&rdb_block_cache_resize_mutex);
@@ -5436,7 +5883,7 @@ static inline void rocksdb_smart_next(bool seek_backward,
}
}
-#ifndef NDEBUG
+#ifndef DBUG_OFF
// simulate that RocksDB has reported corrupted data
static void dbug_change_status_to_corrupted(rocksdb::Status *status) {
*status = rocksdb::Status::Corruption();
@@ -5471,39 +5918,39 @@ static inline bool is_valid(rocksdb::Iterator *scan_it) {
they are needed to function.
*/
-Rdb_table_handler *
-Rdb_open_tables_map::get_table_handler(const char *const table_name) {
+Rdb_table_handler *Rdb_open_tables_map::get_table_handler(
+ const char *const table_name) {
+ DBUG_ASSERT(table_name != nullptr);
+
Rdb_table_handler *table_handler;
- uint length;
- char *tmp_name;
- DBUG_ASSERT(table_name != nullptr);
- length = (uint)strlen(table_name);
+ std::string table_name_str(table_name);
// First, look up the table in the hash map.
RDB_MUTEX_LOCK_CHECK(m_mutex);
- if (!m_hash.size() || !(table_handler = m_hash.find(table_name, length))) {
+ const auto it = m_table_map.find(table_name_str);
+ if (it != m_table_map.end()) {
+ // Found it
+ table_handler = it->second;
+ } else {
+ char *tmp_name;
+
// Since we did not find it in the hash map, attempt to create and add it
// to the hash map.
if (!(table_handler = reinterpret_cast<Rdb_table_handler *>(my_multi_malloc(
MYF(MY_WME | MY_ZEROFILL), &table_handler, sizeof(*table_handler),
- &tmp_name, length + 1, NullS)))) {
+ &tmp_name, table_name_str.length() + 1, NullS)))) {
// Allocating a new Rdb_table_handler and a new table name failed.
RDB_MUTEX_UNLOCK_CHECK(m_mutex);
return nullptr;
}
table_handler->m_ref_count = 0;
- table_handler->m_table_name_length = length;
+ table_handler->m_table_name_length = table_name_str.length();
table_handler->m_table_name = tmp_name;
strmov(table_handler->m_table_name, table_name);
- if (m_hash.insert(table_handler)) {
- // Inserting into the hash map failed.
- RDB_MUTEX_UNLOCK_CHECK(m_mutex);
- my_free(table_handler);
- return nullptr;
- }
+ m_table_map.emplace(table_name_str, table_handler);
thr_lock_init(&table_handler->m_thr_lock);
#ifdef MARIAROCKS_NOT_YET
@@ -5524,16 +5971,15 @@ std::vector<std::string> rdb_get_open_table_names(void) {
}
std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const {
- size_t i;
const Rdb_table_handler *table_handler;
std::vector<std::string> names;
RDB_MUTEX_LOCK_CHECK(m_mutex);
- for (i = 0; (table_handler = m_hash.at(i)); i++) {
+ for (const auto &kv : m_table_map) {
+ table_handler = kv.second;
DBUG_ASSERT(table_handler != nullptr);
names.push_back(table_handler->m_table_name);
}
- DBUG_ASSERT(i == m_hash.size());
RDB_MUTEX_UNLOCK_CHECK(m_mutex);
return names;
@@ -5546,44 +5992,44 @@ std::vector<std::string> Rdb_open_tables_map::get_table_names(void) const {
static ulonglong rdb_get_int_col_max_value(const Field *field) {
ulonglong max_value = 0;
switch (field->key_type()) {
- case HA_KEYTYPE_BINARY:
- max_value = 0xFFULL;
- break;
- case HA_KEYTYPE_INT8:
- max_value = 0x7FULL;
- break;
- case HA_KEYTYPE_USHORT_INT:
- max_value = 0xFFFFULL;
- break;
- case HA_KEYTYPE_SHORT_INT:
- max_value = 0x7FFFULL;
- break;
- case HA_KEYTYPE_UINT24:
- max_value = 0xFFFFFFULL;
- break;
- case HA_KEYTYPE_INT24:
- max_value = 0x7FFFFFULL;
- break;
- case HA_KEYTYPE_ULONG_INT:
- max_value = 0xFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONG_INT:
- max_value = 0x7FFFFFFFULL;
- break;
- case HA_KEYTYPE_ULONGLONG:
- max_value = 0xFFFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONGLONG:
- max_value = 0x7FFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_FLOAT:
- max_value = 0x1000000ULL;
- break;
- case HA_KEYTYPE_DOUBLE:
- max_value = 0x20000000000000ULL;
- break;
- default:
- abort();
+ case HA_KEYTYPE_BINARY:
+ max_value = 0xFFULL;
+ break;
+ case HA_KEYTYPE_INT8:
+ max_value = 0x7FULL;
+ break;
+ case HA_KEYTYPE_USHORT_INT:
+ max_value = 0xFFFFULL;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ max_value = 0x7FFFULL;
+ break;
+ case HA_KEYTYPE_UINT24:
+ max_value = 0xFFFFFFULL;
+ break;
+ case HA_KEYTYPE_INT24:
+ max_value = 0x7FFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONG_INT:
+ max_value = 0xFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ max_value = 0x7FFFFFFFULL;
+ break;
+ case HA_KEYTYPE_ULONGLONG:
+ max_value = 0xFFFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONGLONG:
+ max_value = 0x7FFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_FLOAT:
+ max_value = 0x1000000ULL;
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ max_value = 0x20000000000000ULL;
+ break;
+ default:
+ abort();
}
return max_value;
@@ -5592,7 +6038,7 @@ static ulonglong rdb_get_int_col_max_value(const Field *field) {
void ha_rocksdb::load_auto_incr_value() {
ulonglong auto_incr = 0;
bool validate_last = false, use_datadic = true;
-#ifndef NDEBUG
+#ifndef DBUG_OFF
DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;);
validate_last = true;
#endif
@@ -5639,7 +6085,7 @@ ulonglong ha_rocksdb::load_auto_incr_value_from_index() {
// appropriately and non-index-only lookup will not read the value)
const bool save_keyread_only = m_keyread_only;
m_keyread_only = true;
- m_key_requested = true;
+ m_converter->set_is_key_requested(true);
if (!index_last(table->record[0])) {
Field *field =
@@ -5651,7 +6097,7 @@ ulonglong ha_rocksdb::load_auto_incr_value_from_index() {
if (last_val != max_val) {
last_val++;
}
-#ifndef NDEBUG
+#ifndef DBUG_OFF
ulonglong dd_val;
if (last_val <= max_val) {
const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id();
@@ -5776,8 +6222,9 @@ int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) {
// Get hidden primary key from old key slice
Rdb_string_reader reader(&rowkey_slice);
- if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE)))
+ if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
const int length= 8; /* was Field_longlong::PACK_LENGTH in FB MySQL tree */
const uchar *from = reinterpret_cast<const uchar *>(reader.read(length));
@@ -5805,8 +6252,9 @@ void Rdb_open_tables_map::release_table_handler(
DBUG_ASSERT(table_handler->m_ref_count > 0);
if (!--table_handler->m_ref_count) {
// Last reference was released. Tear down the hash entry.
- const auto ret MY_ATTRIBUTE((__unused__)) = m_hash.remove(table_handler);
- DBUG_ASSERT(!ret); // the hash entry must actually be found and deleted
+ const auto ret MY_ATTRIBUTE((__unused__)) =
+ m_table_map.erase(std::string(table_handler->m_table_name));
+ DBUG_ASSERT(ret == 1); // the hash entry must actually be found and deleted
my_core::thr_lock_delete(&table_handler->m_thr_lock);
my_free(table_handler);
}
@@ -5822,19 +6270,34 @@ static handler *rocksdb_create_handler(my_core::handlerton *const hton,
ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton,
my_core::TABLE_SHARE *const table_arg)
- : handler(hton, table_arg), m_table_handler(nullptr), m_scan_it(nullptr),
- m_scan_it_skips_bloom(false), m_scan_it_snapshot(nullptr),
- m_scan_it_lower_bound(nullptr), m_scan_it_upper_bound(nullptr),
- m_tbl_def(nullptr), m_pk_descr(nullptr), m_key_descr_arr(nullptr),
- m_pk_can_be_decoded(false), m_maybe_unpack_info(false),
- m_pk_tuple(nullptr), m_pk_packed_tuple(nullptr),
- m_sk_packed_tuple(nullptr), m_end_key_packed_tuple(nullptr),
- m_sk_match_prefix(nullptr), m_sk_match_prefix_buf(nullptr),
- m_sk_packed_tuple_old(nullptr), m_dup_sk_packed_tuple(nullptr),
- m_dup_sk_packed_tuple_old(nullptr), m_pack_buffer(nullptr),
- m_lock_rows(RDB_LOCK_NONE), m_keyread_only(FALSE), m_encoder_arr(nullptr),
- m_row_checksums_checked(0), m_in_rpl_delete_rows(false),
- m_in_rpl_update_rows(false), m_force_skip_unique_check(false) {}
+ : handler(hton, table_arg),
+ m_table_handler(nullptr),
+ m_scan_it(nullptr),
+ m_scan_it_skips_bloom(false),
+ m_scan_it_snapshot(nullptr),
+ m_scan_it_lower_bound(nullptr),
+ m_scan_it_upper_bound(nullptr),
+ m_tbl_def(nullptr),
+ m_pk_descr(nullptr),
+ m_key_descr_arr(nullptr),
+ m_pk_can_be_decoded(false),
+ m_pk_tuple(nullptr),
+ m_pk_packed_tuple(nullptr),
+ m_sk_packed_tuple(nullptr),
+ m_end_key_packed_tuple(nullptr),
+ m_sk_match_prefix(nullptr),
+ m_sk_match_prefix_buf(nullptr),
+ m_sk_packed_tuple_old(nullptr),
+ m_dup_sk_packed_tuple(nullptr),
+ m_dup_sk_packed_tuple_old(nullptr),
+ m_pack_buffer(nullptr),
+ m_lock_rows(RDB_LOCK_NONE),
+ m_keyread_only(false),
+ m_insert_with_update(false),
+ m_dup_pk_found(false),
+ m_in_rpl_delete_rows(false),
+ m_in_rpl_update_rows(false),
+ m_force_skip_unique_check(false) {}
const std::string &ha_rocksdb::get_table_basename() const {
@@ -5853,9 +6316,9 @@ bool ha_rocksdb::init_with_fields() {
if (pk != MAX_KEY) {
const uint key_parts = table_share->key_info[pk].user_defined_key_parts;
check_keyread_allowed(pk /*PK*/, key_parts - 1, true);
- } else
+ } else {
m_pk_can_be_decoded = false;
-
+ }
cached_table_flags = table_flags();
DBUG_RETURN(false); /* Ok */
@@ -5912,298 +6375,52 @@ bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd,
RDB_MAX_HEXDUMP_LEN);
const GL_INDEX_ID gl_index_id = kd.get_gl_index_id();
// NO_LINT_DEBUG
- sql_print_error("Decoding ttl from PK value failed, "
- "for index (%u,%u), val: %s",
- gl_index_id.cf_id, gl_index_id.index_id, buf.c_str());
+ sql_print_error(
+ "Decoding ttl from PK value failed, "
+ "for index (%u,%u), val: %s",
+ gl_index_id.cf_id, gl_index_id.index_id, buf.c_str());
DBUG_ASSERT(0);
return false;
}
/* Hide record if it has expired before the current snapshot time. */
uint64 read_filter_ts = 0;
-#ifndef NDEBUG
+#ifndef DBUG_OFF
read_filter_ts += rdb_dbug_set_ttl_read_filter_ts();
#endif
bool is_hide_ttl =
ts + kd.m_ttl_duration + read_filter_ts <= static_cast<uint64>(curr_ts);
if (is_hide_ttl) {
update_row_stats(ROWS_FILTERED);
+
+ /* increment examined row count when rows are skipped */
+ THD *thd = ha_thd();
+ thd->inc_examined_row_count(1);
+ DEBUG_SYNC(thd, "rocksdb.ttl_rows_examined");
}
return is_hide_ttl;
}
-void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
- rocksdb::Iterator *const iter,
- bool seek_backward) {
+int ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward) {
if (kd.has_ttl()) {
+ THD *thd = ha_thd();
while (iter->Valid() &&
should_hide_ttl_rec(
kd, iter->value(),
get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
- rocksdb_smart_next(seek_backward, iter);
- }
- }
-}
-
-/**
- Convert record from table->record[0] form into a form that can be written
- into rocksdb.
-
- @param pk_packed_slice Packed PK tuple. We need it in order to compute
- and store its CRC.
- @param packed_rec OUT Data slice with record data.
-*/
-
-int ha_rocksdb::convert_record_to_storage_format(
- const struct update_row_info &row_info, rocksdb::Slice *const packed_rec) {
- DBUG_ASSERT_IMP(m_maybe_unpack_info, row_info.new_pk_unpack_info);
- DBUG_ASSERT(m_pk_descr != nullptr);
-
- const rocksdb::Slice &pk_packed_slice = row_info.new_pk_slice;
- Rdb_string_writer *const pk_unpack_info = row_info.new_pk_unpack_info;
- bool has_ttl = m_pk_descr->has_ttl();
- bool has_ttl_column = !m_pk_descr->m_ttl_column.empty();
- bool ttl_in_pk = has_ttl_column && (row_info.ttl_pk_offset != UINT_MAX);
-
- m_storage_record.length(0);
-
- if (has_ttl) {
- /* If it's a TTL record, reserve space for 8 byte TTL value in front. */
- m_storage_record.fill(ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_in_rec, 0);
- m_ttl_bytes_updated = false;
-
- /*
- If the TTL is contained within the key, we use the offset to find the
- TTL value and place it in the beginning of the value record.
- */
- if (ttl_in_pk) {
- Rdb_string_reader reader(&pk_packed_slice);
- const char *ts;
- if (!reader.read(row_info.ttl_pk_offset) ||
- !(ts = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) {
- std::string buf;
- buf = rdb_hexdump(pk_packed_slice.data(), pk_packed_slice.size(),
- RDB_MAX_HEXDUMP_LEN);
- const GL_INDEX_ID gl_index_id = m_pk_descr->get_gl_index_id();
- // NO_LINT_DEBUG
- sql_print_error("Decoding ttl from PK failed during insert, "
- "for index (%u,%u), key: %s",
- gl_index_id.cf_id, gl_index_id.index_id, buf.c_str());
- return HA_EXIT_FAILURE;
- }
-
- char *const data = const_cast<char *>(m_storage_record.ptr());
- memcpy(data, ts, ROCKSDB_SIZEOF_TTL_RECORD);
-#ifndef NDEBUG
- // Adjust for test case if needed
- rdb_netbuf_store_uint64(
- reinterpret_cast<uchar *>(data),
- rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(data)) +
- rdb_dbug_set_ttl_rec_ts());
-#endif
- // Also store in m_ttl_bytes to propagate to update_sk
- memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
- } else if (!has_ttl_column) {
- /*
- For implicitly generated TTL records we need to copy over the old
- TTL value from the old record in the event of an update. It was stored
- in m_ttl_bytes.
-
- Otherwise, generate a timestamp using the current time.
- */
- if (!row_info.old_pk_slice.empty()) {
- char *const data = const_cast<char *>(m_storage_record.ptr());
- memcpy(data, m_ttl_bytes, sizeof(uint64));
- } else {
- uint64 ts = static_cast<uint64>(std::time(nullptr));
-#ifndef NDEBUG
- ts += rdb_dbug_set_ttl_rec_ts();
-#endif
- char *const data = const_cast<char *>(m_storage_record.ptr());
- rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
- // Also store in m_ttl_bytes to propagate to update_sk
- memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
- }
- }
- } else {
- /* All NULL bits are initially 0 */
- m_storage_record.fill(m_null_bytes_in_rec, 0);
- }
-
- // If a primary key may have non-empty unpack_info for certain values,
- // (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block
- // itself was prepared in Rdb_key_def::pack_record.
- if (m_maybe_unpack_info) {
- m_storage_record.append(reinterpret_cast<char *>(pk_unpack_info->ptr()),
- pk_unpack_info->get_current_pos());
- }
-
- for (uint i = 0; i < table->s->fields; i++) {
- /* Don't pack decodable PK key parts */
- if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) {
- continue;
- }
-
- Field *const field = table->field[i];
- if (m_encoder_arr[i].maybe_null()) {
- char *data = const_cast<char *>(m_storage_record.ptr());
- if (has_ttl) {
- data += ROCKSDB_SIZEOF_TTL_RECORD;
- }
-
- if (field->is_null()) {
- data[m_encoder_arr[i].m_null_offset] |= m_encoder_arr[i].m_null_mask;
- /* Don't write anything for NULL values */
- continue;
- }
- }
-
- if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_BLOB) {
- my_core::Field_blob *blob = (my_core::Field_blob *)field;
- /* Get the number of bytes needed to store length*/
- const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr;
-
- /* Store the length of the value */
- m_storage_record.append(reinterpret_cast<char *>(blob->ptr),
- length_bytes);
-
- /* Store the blob value itself */
- char *data_ptr;
- memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar **));
- m_storage_record.append(data_ptr, blob->get_length());
- } else if (m_encoder_arr[i].m_field_type == MYSQL_TYPE_VARCHAR) {
- Field_varstring *const field_var = (Field_varstring *)field;
- uint data_len;
- /* field_var->length_bytes is 1 or 2 */
- if (field_var->length_bytes == 1) {
- data_len = field_var->ptr[0];
- } else {
- DBUG_ASSERT(field_var->length_bytes == 2);
- data_len = uint2korr(field_var->ptr);
- }
- m_storage_record.append(reinterpret_cast<char *>(field_var->ptr),
- field_var->length_bytes + data_len);
- } else {
- /* Copy the field data */
- const uint len = field->pack_length_in_rec();
- m_storage_record.append(reinterpret_cast<char *>(field->ptr), len);
-
- /*
- Check if this is the TTL field within the table, if so store the TTL
- in the front of the record as well here.
- */
- if (has_ttl && has_ttl_column &&
- i == m_pk_descr->get_ttl_field_offset()) {
- DBUG_ASSERT(len == ROCKSDB_SIZEOF_TTL_RECORD);
- DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
- DBUG_ASSERT(m_pk_descr->get_ttl_field_offset() != UINT_MAX);
-
- char *const data = const_cast<char *>(m_storage_record.ptr());
- uint64 ts = uint8korr(field->ptr);
-#ifndef NDEBUG
- ts += rdb_dbug_set_ttl_rec_ts();
-#endif
- rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
-
- // If this is an update and the timestamp has been updated, take note
- // so we can avoid updating SKs unnecessarily.
- if (!row_info.old_pk_slice.empty()) {
- m_ttl_bytes_updated =
- memcmp(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
- }
- // Store timestamp in m_ttl_bytes to propagate to update_sk
- memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ DEBUG_SYNC(thd, "rocksdb.check_flags_ser");
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
}
+ rocksdb_smart_next(seek_backward, iter);
}
}
-
- if (should_store_row_debug_checksums()) {
- const uint32_t key_crc32 = my_core::crc32(
- 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size());
- const uint32_t val_crc32 =
- my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record),
- m_storage_record.length());
- uchar key_crc_buf[RDB_CHECKSUM_SIZE];
- uchar val_crc_buf[RDB_CHECKSUM_SIZE];
- rdb_netbuf_store_uint32(key_crc_buf, key_crc32);
- rdb_netbuf_store_uint32(val_crc_buf, val_crc32);
- m_storage_record.append((const char *)&RDB_CHECKSUM_DATA_TAG, 1);
- m_storage_record.append((const char *)key_crc_buf, RDB_CHECKSUM_SIZE);
- m_storage_record.append((const char *)val_crc_buf, RDB_CHECKSUM_SIZE);
- }
-
- *packed_rec =
- rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length());
-
return HA_EXIT_SUCCESS;
}
-/*
- @brief
- Setup which fields will be unpacked when reading rows
-
- @detail
- Three special cases when we still unpack all fields:
- - When this table is being updated (m_lock_rows==RDB_LOCK_WRITE).
- - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to
- read all fields to find whether there is a row checksum at the end. We could
- skip the fields instead of decoding them, but currently we do decoding.)
- - On index merge as bitmap is cleared during that operation
-
- @seealso
- ha_rocksdb::setup_field_converters()
- ha_rocksdb::convert_record_from_storage_format()
-*/
-void ha_rocksdb::setup_read_decoders() {
- m_decoders_vect.clear();
- m_key_requested = false;
-
- int last_useful = 0;
- int skip_size = 0;
-
- for (uint i = 0; i < table->s->fields; i++) {
- // bitmap is cleared on index merge, but it still needs to decode columns
- const bool field_requested =
- m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums ||
- bitmap_is_clear_all(table->read_set) ||
- bitmap_is_set(table->read_set, table->field[i]->field_index);
-
- // We only need the decoder if the whole record is stored.
- if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) {
- // the field potentially needs unpacking
- if (field_requested) {
- // the field is in the read set
- m_key_requested = true;
- }
- continue;
- }
-
- if (field_requested) {
- // We will need to decode this field
- m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size});
- last_useful = m_decoders_vect.size();
- skip_size = 0;
- } else {
- if (m_encoder_arr[i].uses_variable_len_encoding() ||
- m_encoder_arr[i].maybe_null()) {
- // For variable-length field, we need to read the data and skip it
- m_decoders_vect.push_back({&m_encoder_arr[i], false, skip_size});
- skip_size = 0;
- } else {
- // Fixed-width field can be skipped without looking at it.
- // Add appropriate skip_size to the next field.
- skip_size += m_encoder_arr[i].m_pack_length_in_rec;
- }
- }
- }
-
- // It could be that the last few elements are varchars that just do
- // skipping. Remove them.
- m_decoders_vect.erase(m_decoders_vect.begin() + last_useful,
- m_decoders_vect.end());
-}
-
-#ifndef NDEBUG
+#ifndef DBUG_OFF
void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) {
std::string str(on_disk_rec->data(), on_disk_rec->size());
on_disk_rec->Reset();
@@ -6228,17 +6445,6 @@ void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) {
on_disk_rec->PinSelf(rocksdb::Slice(res));
}
-void dbug_modify_key_varchar8(String &on_disk_rec) {
- std::string res;
- // The key starts with index number
- res.append(on_disk_rec.ptr(), Rdb_key_def::INDEX_NUMBER_SIZE);
-
- // Then, a mem-comparable form of a varchar(8) value.
- res.append("ABCDE\0\0\0\xFC", 9);
- on_disk_rec.length(0);
- on_disk_rec.append(res.data(), res.size());
-}
-
void dbug_create_err_inplace_alter() {
my_printf_error(ER_UNKNOWN_ERROR,
"Intentional failure in inplace alter occurred.", MYF(0));
@@ -6247,7 +6453,6 @@ void dbug_create_err_inplace_alter() {
int ha_rocksdb::convert_record_from_storage_format(
const rocksdb::Slice *const key, uchar *const buf) {
-
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1",
dbug_append_garbage_at_end(&m_retrieved_record););
DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2",
@@ -6258,91 +6463,6 @@ int ha_rocksdb::convert_record_from_storage_format(
return convert_record_from_storage_format(key, &m_retrieved_record, buf);
}
-int ha_rocksdb::convert_blob_from_storage_format(
- my_core::Field_blob *const blob,
- Rdb_string_reader *const reader,
- bool decode)
-{
- /* Get the number of bytes needed to store length*/
- const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr;
-
- const char *data_len_str;
- if (!(data_len_str = reader->read(length_bytes))) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- memcpy(blob->ptr, data_len_str, length_bytes);
-
- const uint32 data_len = blob->get_length(
- reinterpret_cast<const uchar*>(data_len_str), length_bytes);
- const char *blob_ptr;
- if (!(blob_ptr = reader->read(data_len))) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- if (decode) {
- // set 8-byte pointer to 0, like innodb does (relevant for 32-bit
- // platforms)
- memset(blob->ptr + length_bytes, 0, 8);
- memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar **));
- }
-
- return HA_EXIT_SUCCESS;
-}
-
-int ha_rocksdb::convert_varchar_from_storage_format(
- my_core::Field_varstring *const field_var,
- Rdb_string_reader *const reader,
- bool decode)
-{
- const char *data_len_str;
- if (!(data_len_str = reader->read(field_var->length_bytes)))
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
-
- uint data_len;
- /* field_var->length_bytes is 1 or 2 */
- if (field_var->length_bytes == 1) {
- data_len = (uchar)data_len_str[0];
- } else {
- DBUG_ASSERT(field_var->length_bytes == 2);
- data_len = uint2korr(data_len_str);
- }
-
- if (data_len > field_var->field_length) {
- /* The data on disk is longer than table DDL allows? */
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- if (!reader->read(data_len)) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- if (decode) {
- memcpy(field_var->ptr, data_len_str, field_var->length_bytes + data_len);
- }
-
- return HA_EXIT_SUCCESS;
-}
-
-int ha_rocksdb::convert_field_from_storage_format(
- my_core::Field *const field,
- Rdb_string_reader *const reader,
- bool decode,
- uint len)
-{
- const char *data_bytes;
- if (len > 0) {
- if ((data_bytes = reader->read(len)) == nullptr) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- if (decode)
- memcpy(field->ptr, data_bytes, len);
- }
-
- return HA_EXIT_SUCCESS;
-}
-
/*
@brief
Unpack the record in this->m_retrieved_record and this->m_last_rowkey from
@@ -6359,8 +6479,8 @@ int ha_rocksdb::convert_field_from_storage_format(
m_retrieved_record).
@seealso
- ha_rocksdb::setup_read_decoders() Sets up data structures which tell which
- columns to decode.
+ rdb_converter::setup_read_decoders() Sets up data structures which tell
+ which columns to decode.
@return
0 OK
@@ -6370,241 +6490,7 @@ int ha_rocksdb::convert_field_from_storage_format(
int ha_rocksdb::convert_record_from_storage_format(
const rocksdb::Slice *const key, const rocksdb::Slice *const value,
uchar *const buf) {
- Rdb_string_reader reader(value);
-
- /*
- Decode PK fields from the key
- */
- DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_read1",
- dbug_modify_key_varchar8(m_last_rowkey););
-
- const rocksdb::Slice rowkey_slice(m_last_rowkey.ptr(),
- m_last_rowkey.length());
- const char *unpack_info = nullptr;
- uint16 unpack_info_len = 0;
- rocksdb::Slice unpack_slice;
-
- /* If it's a TTL record, skip the 8 byte TTL value */
- const char *ttl_bytes;
- if (m_pk_descr->has_ttl()) {
- if ((ttl_bytes = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) {
- memcpy(m_ttl_bytes, ttl_bytes, ROCKSDB_SIZEOF_TTL_RECORD);
- } else {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
- }
-
- /* Other fields are decoded from the value */
- const char *null_bytes = nullptr;
- if (m_null_bytes_in_rec && !(null_bytes = reader.read(m_null_bytes_in_rec))) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- if (m_maybe_unpack_info) {
- unpack_info = reader.get_current_ptr();
- if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) ||
- !reader.read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- unpack_info_len =
- rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1));
- unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len);
-
- reader.read(unpack_info_len -
- Rdb_key_def::get_unpack_header_size(unpack_info[0]));
- }
-
- int err = HA_EXIT_SUCCESS;
- if (m_key_requested) {
- err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
- unpack_info ? &unpack_slice : nullptr,
- false /* verify_checksum */);
- }
-
- if (err != HA_EXIT_SUCCESS) {
- return err;
- }
-
- for (auto it = m_decoders_vect.begin(); it != m_decoders_vect.end(); it++) {
- const Rdb_field_encoder *const field_dec = it->m_field_enc;
- const bool decode = it->m_decode;
- const bool isNull =
- field_dec->maybe_null() &&
- ((null_bytes[field_dec->m_null_offset] & field_dec->m_null_mask) != 0);
-
- Field *const field = table->field[field_dec->m_field_index];
-
- /* Skip the bytes we need to skip */
- if (it->m_skip && !reader.read(it->m_skip)) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- uint field_offset = field->ptr - table->record[0];
- uint null_offset = field->null_offset();
- bool maybe_null = field->real_maybe_null();
- field->move_field(buf + field_offset,
- maybe_null ? buf + null_offset : nullptr,
- field->null_bit);
- // WARNING! - Don't return before restoring field->ptr and field->null_ptr!
-
- if (isNull) {
- if (decode) {
- /* This sets the NULL-bit of this record */
- field->set_null();
- /*
- Besides that, set the field value to default value. CHECKSUM TABLE
- depends on this.
- */
- memcpy(field->ptr, table->s->default_values + field_offset,
- field->pack_length());
- }
- } else {
- if (decode) {
- field->set_notnull();
- }
-
- if (field_dec->m_field_type == MYSQL_TYPE_BLOB) {
- err = convert_blob_from_storage_format(
- (my_core::Field_blob *) field, &reader, decode);
- } else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) {
- err = convert_varchar_from_storage_format(
- (my_core::Field_varstring *) field, &reader, decode);
- } else {
- err = convert_field_from_storage_format(
- field, &reader, decode, field_dec->m_pack_length_in_rec);
- }
- }
-
- // Restore field->ptr and field->null_ptr
- field->move_field(table->record[0] + field_offset,
- maybe_null ? table->record[0] + null_offset : nullptr,
- field->null_bit);
-
- if (err != HA_EXIT_SUCCESS) {
- return err;
- }
- }
-
- if (m_verify_row_debug_checksums) {
- if (reader.remaining_bytes() == RDB_CHECKSUM_CHUNK_SIZE &&
- reader.read(1)[0] == RDB_CHECKSUM_DATA_TAG) {
- uint32_t stored_key_chksum =
- rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE));
- uint32_t stored_val_chksum =
- rdb_netbuf_to_uint32((const uchar *)reader.read(RDB_CHECKSUM_SIZE));
-
- const uint32_t computed_key_chksum =
- my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size());
- const uint32_t computed_val_chksum =
- my_core::crc32(0, rdb_slice_to_uchar_ptr(value),
- value->size() - RDB_CHECKSUM_CHUNK_SIZE);
-
- DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1",
- stored_key_chksum++;);
-
- if (stored_key_chksum != computed_key_chksum) {
- m_pk_descr->report_checksum_mismatch(true, key->data(), key->size());
- return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
- }
-
- DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2",
- stored_val_chksum++;);
- if (stored_val_chksum != computed_val_chksum) {
- m_pk_descr->report_checksum_mismatch(false, value->data(),
- value->size());
- return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
- }
-
- m_row_checksums_checked++;
- }
- if (reader.remaining_bytes())
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
-
- return HA_EXIT_SUCCESS;
-}
-
-void ha_rocksdb::get_storage_type(Rdb_field_encoder *const encoder,
- const uint &kp) {
- // STORE_SOME uses unpack_info.
- if (m_pk_descr->has_unpack_info(kp)) {
- DBUG_ASSERT(m_pk_descr->can_unpack(kp));
- encoder->m_storage_type = Rdb_field_encoder::STORE_SOME;
- m_maybe_unpack_info = true;
- } else if (m_pk_descr->can_unpack(kp)) {
- encoder->m_storage_type = Rdb_field_encoder::STORE_NONE;
- }
-}
-
-/*
- Setup data needed to convert table->record[] to and from record storage
- format.
-
- @seealso
- ha_rocksdb::convert_record_to_storage_format,
- ha_rocksdb::convert_record_from_storage_format
-*/
-
-void ha_rocksdb::setup_field_converters() {
- uint i;
- uint null_bytes = 0;
- uchar cur_null_mask = 0x1;
-
- DBUG_ASSERT(m_encoder_arr == nullptr);
- m_encoder_arr = static_cast<Rdb_field_encoder *>(
- my_malloc(table->s->fields * sizeof(Rdb_field_encoder), MYF(0)));
- if (m_encoder_arr == nullptr) {
- return;
- }
-
- for (i = 0; i < table->s->fields; i++) {
- Field *const field = table->field[i];
- m_encoder_arr[i].m_storage_type = Rdb_field_encoder::STORE_ALL;
-
- /*
- Check if this field is
- - a part of primary key, and
- - it can be decoded back from its key image.
- If both hold, we don't need to store this field in the value part of
- RocksDB's key-value pair.
-
- If hidden pk exists, we skip this check since the field will never be
- part of the hidden pk.
- */
- if (!has_hidden_pk(table)) {
- KEY *const pk_info = &table->key_info[table->s->primary_key];
- for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) {
- /* key_part->fieldnr is counted from 1 */
- if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) {
- get_storage_type(&m_encoder_arr[i], kp);
- break;
- }
- }
- }
-
- m_encoder_arr[i].m_field_type = field->real_type();
- m_encoder_arr[i].m_field_index = i;
- m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec();
-
- if (field->real_maybe_null()) {
- m_encoder_arr[i].m_null_mask = cur_null_mask;
- m_encoder_arr[i].m_null_offset = null_bytes;
- if (cur_null_mask == 0x80) {
- cur_null_mask = 0x1;
- null_bytes++;
- } else
- cur_null_mask = cur_null_mask << 1;
- } else {
- m_encoder_arr[i].m_null_mask = 0;
- }
- }
-
- /* Count the last, unfinished NULL-bits byte */
- if (cur_null_mask != 0x1)
- null_bytes++;
-
- m_null_bytes_in_rec = null_bytes;
+ return m_converter->decode(m_pk_descr, buf, key, value);
}
int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
@@ -6641,8 +6527,8 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
/* Sometimes, we may use m_sk_packed_tuple for storing packed PK */
max_packed_sk_len = pack_key_len;
for (uint i = 0; i < table_arg->s->keys; i++) {
- if (i == table_arg->s->primary_key) /* Primary key was processed above */
- continue;
+ /* Primary key was processed above */
+ if (i == table_arg->s->primary_key) continue;
// TODO: move this into get_table_handler() ??
kd_arr[i]->setup(table_arg, tbl_def_arg);
@@ -6731,25 +6617,10 @@ void ha_rocksdb::free_key_buffers() {
}
#ifdef MARIAROCKS_NOT_YET
-void ha_rocksdb::set_use_read_free_rpl(const char *const whitelist) {
- DBUG_ASSERT(whitelist != nullptr);
-
-#if defined(HAVE_PSI_INTERFACE)
- Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables);
-#else
- Regex_list_handler regex_handler;
-#endif
-
- if (!regex_handler.set_patterns(whitelist)) {
- warn_about_bad_patterns(&regex_handler, "read_free_rpl_tables");
- }
-
- m_use_read_free_rpl = regex_handler.matches(m_tbl_def->base_tablename());
-}
#endif
-
void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) {
- DBUG_ASSERT(whitelist != nullptr);
+ const char *const wl =
+ whitelist ? whitelist : DEFAULT_SKIP_UNIQUE_CHECK_TABLES;
#if defined(HAVE_PSI_INTERFACE)
Regex_list_handler regex_handler(key_rwlock_skip_unique_check_tables);
@@ -6757,7 +6628,7 @@ void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) {
Regex_list_handler regex_handler;
#endif
- if (!regex_handler.set_patterns(whitelist)) {
+ if (!regex_handler.set_patterns(wl)) {
warn_about_bad_patterns(&regex_handler, "skip_unique_check_tables");
}
@@ -6804,8 +6675,8 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
"dictionary");
DBUG_RETURN(HA_ERR_ROCKSDB_INVALID_TABLE);
}
- m_lock_rows = RDB_LOCK_NONE;
+ m_lock_rows = RDB_LOCK_NONE;
m_key_descr_arr = m_tbl_def->m_key_descr_arr;
/*
@@ -6834,7 +6705,15 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
*/
init_with_fields();
- setup_field_converters();
+ /* Initialize decoder */
+ m_converter = std::make_shared<Rdb_converter>(ha_thd(), m_tbl_def, table);
+
+ /*
+ Update m_ttl_bytes address to same as Rdb_converter's m_ttl_bytes.
+ Remove this code after moving convert_record_to_storage_format() into
+ Rdb_converter class.
+ */
+ m_ttl_bytes = m_converter->get_ttl_bytes_buffer();
/*
MariaDB: adjust field->part_of_key for PK columns. We can only do it here
@@ -6877,11 +6756,10 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
The following load_XXX code calls row decode functions, and they do
that without having done ::external_lock() or index_init()/rnd_init().
(Note: this also means we're doing a read when there was no
- setup_field_converters() call)
+ rdb_converter::setup_field_encoders() call)
Initialize the necessary variables for them:
*/
- m_verify_row_debug_checksums = false;
/* Load auto_increment value only once on first use. */
if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) {
@@ -6899,10 +6777,7 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
stats.block_size = rocksdb_tbl_options->block_size;
#ifdef MARIAROCKS_NOT_YET // MDEV-10976
- /* Determine at open whether we can use Read Free Replication or not */
- set_use_read_free_rpl(THDVAR(ha_thd(), read_free_rpl_tables));
#endif
-
/* Determine at open whether we should skip unique checks for this table */
set_skip_unique_check_tables(THDVAR(ha_thd(), skip_unique_check_tables));
@@ -6914,19 +6789,15 @@ int ha_rocksdb::close(void) {
m_pk_descr = nullptr;
m_key_descr_arr = nullptr;
-
+ m_converter = nullptr;
free_key_buffers();
- my_free(m_encoder_arr);
- m_encoder_arr = nullptr;
-
if (m_table_handler != nullptr) {
rdb_open_tables.release_table_handler(m_table_handler);
m_table_handler = nullptr;
}
// These are needed to suppress valgrind errors in rocksdb.partition
- m_storage_record.free();
m_last_rowkey.free();
m_sk_tails.free();
m_sk_tails_old.free();
@@ -7017,61 +6888,72 @@ int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s,
int err;
switch (s.code()) {
- case rocksdb::Status::Code::kOk:
- err = HA_EXIT_SUCCESS;
- break;
- case rocksdb::Status::Code::kNotFound:
- err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND;
- break;
- case rocksdb::Status::Code::kCorruption:
- err = HA_ERR_ROCKSDB_STATUS_CORRUPTION;
- break;
- case rocksdb::Status::Code::kNotSupported:
- err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED;
- break;
- case rocksdb::Status::Code::kInvalidArgument:
- err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT;
- break;
- case rocksdb::Status::Code::kIOError:
- err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE
- : HA_ERR_ROCKSDB_STATUS_IO_ERROR;
- break;
- case rocksdb::Status::Code::kMergeInProgress:
- err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS;
- break;
- case rocksdb::Status::Code::kIncomplete:
- err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE;
- break;
- case rocksdb::Status::Code::kShutdownInProgress:
- err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS;
- break;
- case rocksdb::Status::Code::kTimedOut:
- err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT;
- break;
- case rocksdb::Status::Code::kAborted:
- err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT
- : HA_ERR_ROCKSDB_STATUS_ABORTED;
- break;
- case rocksdb::Status::Code::kBusy:
- err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK
- : HA_ERR_ROCKSDB_STATUS_BUSY;
- break;
- case rocksdb::Status::Code::kExpired:
- err = HA_ERR_ROCKSDB_STATUS_EXPIRED;
- break;
- case rocksdb::Status::Code::kTryAgain:
- err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN;
- break;
- default:
- DBUG_ASSERT(0);
- return -1;
+ case rocksdb::Status::Code::kOk:
+ err = HA_EXIT_SUCCESS;
+ break;
+ case rocksdb::Status::Code::kNotFound:
+ err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND;
+ break;
+ case rocksdb::Status::Code::kCorruption:
+ err = HA_ERR_ROCKSDB_STATUS_CORRUPTION;
+ break;
+ case rocksdb::Status::Code::kNotSupported:
+ err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED;
+ break;
+ case rocksdb::Status::Code::kInvalidArgument:
+ err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT;
+ break;
+ case rocksdb::Status::Code::kIOError:
+ err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE
+ : HA_ERR_ROCKSDB_STATUS_IO_ERROR;
+ break;
+ case rocksdb::Status::Code::kMergeInProgress:
+ err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS;
+ break;
+ case rocksdb::Status::Code::kIncomplete:
+ err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE;
+ break;
+ case rocksdb::Status::Code::kShutdownInProgress:
+ err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS;
+ break;
+ case rocksdb::Status::Code::kTimedOut:
+ err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT;
+ break;
+ case rocksdb::Status::Code::kAborted:
+ err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT
+ : HA_ERR_ROCKSDB_STATUS_ABORTED;
+ break;
+ case rocksdb::Status::Code::kBusy:
+ err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK
+ : HA_ERR_ROCKSDB_STATUS_BUSY;
+ break;
+ case rocksdb::Status::Code::kExpired:
+ err = HA_ERR_ROCKSDB_STATUS_EXPIRED;
+ break;
+ case rocksdb::Status::Code::kTryAgain:
+ err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN;
+ break;
+ default:
+ DBUG_ASSERT(0);
+ return -1;
+ }
+
+ std::string errMsg;
+ if (s.IsLockLimit()) {
+ errMsg =
+ "Operation aborted: Failed to acquire lock due to "
+ "rocksdb_max_row_locks limit";
+ } else {
+ errMsg = s.ToString();
}
if (opt_msg) {
- std::string concatenated_error = s.ToString() + " (" + std::string(opt_msg) + ")";
- my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(), rocksdb_hton_name);
+ std::string concatenated_error = errMsg + " (" + std::string(opt_msg) + ")";
+ my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(),
+ rocksdb_hton_name);
} else {
- my_error(ER_GET_ERRMSG, MYF(0), s.code(), s.ToString().c_str(), rocksdb_hton_name);
+ my_error(ER_GET_ERRMSG, MYF(0), s.code(), errMsg.c_str(),
+ rocksdb_hton_name);
}
return err;
@@ -7081,8 +6963,8 @@ int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s,
static const std::set<uint> RDB_INDEX_COLLATIONS = {
COLLATION_BINARY, COLLATION_UTF8_BIN, COLLATION_LATIN1_BIN};
-static bool
-rdb_is_index_collation_supported(const my_core::Field *const field) {
+static bool rdb_is_index_collation_supported(
+ const my_core::Field *const field) {
const my_core::enum_field_types type = field->real_type();
/* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */
if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING ||
@@ -7142,8 +7024,6 @@ int ha_rocksdb::create_key_defs(
DBUG_ASSERT(table_arg->s != nullptr);
- uint i;
-
/*
These need to be one greater than MAX_INDEXES since the user can create
MAX_INDEXES secondary keys and no primary key which would cause us
@@ -7160,6 +7040,36 @@ int ha_rocksdb::create_key_defs(
DBUG_RETURN(HA_EXIT_FAILURE);
}
+ uint64 ttl_duration = 0;
+ std::string ttl_column;
+ uint ttl_field_offset;
+
+ uint err;
+ if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg,
+ &ttl_duration))) {
+ DBUG_RETURN(err);
+ }
+
+ if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column,
+ &ttl_field_offset))) {
+ DBUG_RETURN(err);
+ }
+
+ /* We don't currently support TTL on tables with hidden primary keys. */
+ if (ttl_duration > 0 && has_hidden_pk(table_arg)) {
+ my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /*
+ If TTL duration is not specified but TTL column was specified, throw an
+ error because TTL column requires duration.
+ */
+ if (ttl_duration == 0 && !ttl_column.empty()) {
+ my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
if (!old_tbl_def_arg) {
/*
old_tbl_def doesn't exist. this means we are in the process of creating
@@ -7168,9 +7078,9 @@ int ha_rocksdb::create_key_defs(
Get the index numbers (this will update the next_index_number)
and create Rdb_key_def structures.
*/
- for (i = 0; i < tbl_def_arg->m_key_count; i++) {
- if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i],
- cfs[i])) {
+ for (uint i = 0; i < tbl_def_arg->m_key_count; i++) {
+ if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], cfs[i],
+ ttl_duration, ttl_column)) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
}
@@ -7181,7 +7091,8 @@ int ha_rocksdb::create_key_defs(
generate the necessary new key definitions if any.
*/
if (create_inplace_key_defs(table_arg, tbl_def_arg, old_table_arg,
- old_tbl_def_arg, cfs)) {
+ old_tbl_def_arg, cfs, ttl_duration,
+ ttl_column)) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
}
@@ -7267,8 +7178,8 @@ int ha_rocksdb::create_cfs(
// Generate the name for the column family to use.
bool per_part_match_found = false;
- std::string cf_name = generate_cf_name(i, table_arg, tbl_def_arg,
- &per_part_match_found);
+ std::string cf_name =
+ generate_cf_name(i, table_arg, tbl_def_arg, &per_part_match_found);
// Prevent create from using the system column family.
if (cf_name == DEFAULT_SYSTEM_CF_NAME) {
@@ -7313,7 +7224,8 @@ int ha_rocksdb::create_cfs(
int ha_rocksdb::create_inplace_key_defs(
const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg,
const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg,
- const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs) const {
+ const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs,
+ uint64 ttl_duration, const std::string &ttl_column) const {
DBUG_ENTER_FUNC();
std::shared_ptr<Rdb_key_def> *const old_key_descr =
@@ -7339,10 +7251,11 @@ int ha_rocksdb::create_inplace_key_defs(
struct Rdb_index_info index_info;
if (!dict_manager.get_index_info(gl_index_id, &index_info)) {
// NO_LINT_DEBUG
- sql_print_error("RocksDB: Could not get index information "
- "for Index Number (%u,%u), table %s",
- gl_index_id.cf_id, gl_index_id.index_id,
- old_tbl_def_arg->full_tablename().c_str());
+ sql_print_error(
+ "RocksDB: Could not get index information "
+ "for Index Number (%u,%u), table %s",
+ gl_index_id.cf_id, gl_index_id.index_id,
+ old_tbl_def_arg->full_tablename().c_str());
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -7366,7 +7279,7 @@ int ha_rocksdb::create_inplace_key_defs(
dict_manager.get_stats(gl_index_id), index_info.m_index_flags,
ttl_rec_offset, index_info.m_ttl_duration);
} else if (create_key_def(table_arg, i, tbl_def_arg, &new_key_descr[i],
- cfs[i])) {
+ cfs[i], ttl_duration, ttl_column)) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -7516,44 +7429,16 @@ int ha_rocksdb::compare_key_parts(const KEY *const old_key,
0 - Ok
other - error, either given table ddl is not supported by rocksdb or OOM.
*/
-int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
+int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint i,
const Rdb_tbl_def *const tbl_def_arg,
std::shared_ptr<Rdb_key_def> *const new_key_def,
- const struct key_def_cf_info &cf_info) const {
+ const struct key_def_cf_info &cf_info,
+ uint64 ttl_duration,
+ const std::string &ttl_column) const {
DBUG_ENTER_FUNC();
DBUG_ASSERT(*new_key_def == nullptr);
- uint64 ttl_duration = 0;
- std::string ttl_column;
- uint ttl_field_offset;
-
- uint err;
- if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg,
- &ttl_duration))) {
- DBUG_RETURN(err);
- }
-
- if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column,
- &ttl_field_offset))) {
- DBUG_RETURN(err);
- }
-
- /* We don't currently support TTL on tables with hidden primary keys. */
- if (ttl_duration > 0 && is_hidden_pk(i, table_arg, tbl_def_arg)) {
- my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0));
- DBUG_RETURN(HA_EXIT_FAILURE);
- }
-
- /*
- If TTL duration is not specified but TTL column was specified, throw an
- error because TTL column requires duration.
- */
- if (ttl_duration == 0 && !ttl_column.empty()) {
- my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str());
- DBUG_RETURN(HA_EXIT_FAILURE);
- }
-
const uint index_id = ddl_manager.get_and_update_next_number(&dict_manager);
const uint16_t index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST;
uchar index_type;
@@ -7602,7 +7487,8 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
if (!ttl_column.empty()) {
(*new_key_def)->m_ttl_column = ttl_column;
}
-
+ // initialize key_def
+ (*new_key_def)->setup(table_arg, tbl_def_arg);
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -7610,7 +7496,7 @@ int rdb_normalize_tablename(const std::string &tablename,
std::string *const strbuf) {
if (tablename.size() < 2 || tablename[0] != '.' ||
(tablename[1] != FN_LIBCHAR && tablename[1] != FN_LIBCHAR2)) {
- DBUG_ASSERT(0); // We were not passed table name?
+ DBUG_ASSERT(0); // We were not passed table name?
return HA_ERR_ROCKSDB_INVALID_TABLE;
}
@@ -7620,7 +7506,7 @@ int rdb_normalize_tablename(const std::string &tablename,
}
if (pos == std::string::npos) {
- DBUG_ASSERT(0); // We were not passed table name?
+ DBUG_ASSERT(0); // We were not passed table name?
return HA_ERR_ROCKSDB_INVALID_TABLE;
}
@@ -7734,95 +7620,34 @@ int rdb_split_normalized_tablename(const std::string &fullname,
return HA_EXIT_SUCCESS;
}
-/**
- @brief
- create() is called to create a database. The variable name will have the name
- of the table.
-
- @details
- When create() is called you do not need to worry about
- opening the table. Also, the .frm file will have already been
- created so adjusting create_info is not necessary. You can overwrite
- the .frm file at this point if you wish to change the table
- definition, but there are no methods currently provided for doing
- so.
+/*
+ Create a table's Rdb_tbl_def and its Rdb_key_defs and store table information
+ into MyRocks Data Dictionary
+ The method is called during create table/partition, truncate table/partition
- Called from handle.cc by ha_create_table().
+ @param table_name IN table's name formated as
+ 'dbname.tablename'
+ @param table_arg IN sql table
+ @param auto_increment_value IN specified table's auto increment value
@return
HA_EXIT_SUCCESS OK
other HA_ERR error code (can be SE-specific)
-
- @see
- ha_create_table() in handle.cc
*/
-
-int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
- HA_CREATE_INFO *const create_info) {
+int ha_rocksdb::create_table(const std::string &table_name,
+ const TABLE *table_arg,
+ ulonglong auto_increment_value) {
DBUG_ENTER_FUNC();
- DBUG_ASSERT(table_arg != nullptr);
- DBUG_ASSERT(create_info != nullptr);
-
- if (create_info->data_file_name) {
- // DATA DIRECTORY is used to create tables under a specific location
- // outside the MySQL data directory. We don't support this for MyRocks.
- // The `rocksdb_datadir` setting should be used to configure RocksDB data
- // directory.
- print_error(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED,
- MYF(ME_JUST_WARNING));
- DBUG_RETURN(HA_WRONG_CREATE_OPTION);
- }
- if (create_info->index_file_name) {
- // Similar check for INDEX DIRECTORY as well.
- print_error(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED,
- MYF(ME_JUST_WARNING));
- DBUG_RETURN(HA_WRONG_CREATE_OPTION);
- }
- std::string str;
int err;
- THD *const thd = my_core::thd_get_current_thd();
-
- if (get_table_if_exists(name)) {
- if (thd->lex->sql_command == SQLCOM_TRUNCATE) {
- err = delete_table(name);
- if (err != HA_EXIT_SUCCESS) {
- DBUG_RETURN(err);
- }
- } else {
- err = rdb_normalize_tablename(name, &str);
- if (err != HA_EXIT_SUCCESS) {
- DBUG_RETURN(err);
- }
- my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name);
- DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA);
- }
- }
-
- /*
- Construct dbname.tablename ourselves, because parititioning
- passes strings like "./test/t14#P#p0" for individual partitions,
- while table_arg->s->table_name has none of that.
- */
- err = rdb_normalize_tablename(name, &str);
- if (err != HA_EXIT_SUCCESS) {
- DBUG_RETURN(err);
- }
-
- if (contains_foreign_key(thd)) {
- my_error(ER_NOT_SUPPORTED_YET, MYF(0),
- "FOREIGN KEY for the RocksDB storage engine");
- DBUG_RETURN(HA_ERR_UNSUPPORTED);
- }
-
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
/* Create table/key descriptions and put them into the data dictionary */
- m_tbl_def = new Rdb_tbl_def(str);
+ m_tbl_def = new Rdb_tbl_def(table_name);
uint n_keys = table_arg->s->keys;
@@ -7832,6 +7657,9 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
*/
if (has_hidden_pk(table_arg)) {
n_keys += 1;
+ // reset hidden pk id
+ // the starting valid value for hidden pk is 1
+ m_tbl_def->m_hidden_pk_val = 1;
}
m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[n_keys];
@@ -7845,9 +7673,9 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)];
- if (create_info->auto_increment_value) {
+ if (auto_increment_value) {
bool autoinc_upgrade_test = false;
- m_tbl_def->m_auto_incr_val = create_info->auto_increment_value;
+ m_tbl_def->m_auto_incr_val = auto_increment_value;
DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;);
if (!autoinc_upgrade_test) {
auto s = dict_manager.put_auto_incr_val(
@@ -7886,6 +7714,86 @@ error:
}
/**
+ @brief
+ create() is called to create a table. The variable name will have the name
+ of the table.
+
+ @details
+ When create() is called you do not need to worry about
+ opening the table. Also, the .frm file will have already been
+ created so adjusting create_info is not necessary. You can overwrite
+ the .frm file at this point if you wish to change the table
+ definition, but there are no methods currently provided for doing
+ so.
+
+ Called from handle.cc by ha_create_table().
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+
+ @see
+ ha_create_table() in handle.cc
+*/
+
+int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
+ HA_CREATE_INFO *const create_info) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(create_info != nullptr);
+
+ if (create_info->data_file_name) {
+ // DATA DIRECTORY is used to create tables under a specific location
+ // outside the MySQL data directory. We don't support this for MyRocks.
+ // The `rocksdb_datadir` setting should be used to configure RocksDB data
+ // directory.
+ DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED);
+ }
+
+ if (create_info->index_file_name) {
+ // Similar check for INDEX DIRECTORY as well.
+ DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED);
+ }
+
+ int err;
+ /*
+ Construct dbname.tablename ourselves, because parititioning
+ passes strings like "./test/t14#P#p0" for individual partitions,
+ while table_arg->s->table_name has none of that.
+ */
+ std::string str;
+ err = rdb_normalize_tablename(name, &str);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
+ }
+
+ // FOREIGN KEY isn't supported yet
+ THD *const thd = my_core::thd_get_current_thd();
+ if (contains_foreign_key(thd)) {
+ my_error(ER_NOT_SUPPORTED_YET, MYF(0),
+ "FOREIGN KEY for the RocksDB storage engine");
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+
+ // Check whether Data Dictionary contain information
+ Rdb_tbl_def *tbl = ddl_manager.find(str);
+ if (tbl != nullptr) {
+ if (thd->lex->sql_command == SQLCOM_TRUNCATE) {
+ err = delete_table(tbl);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
+ }
+ } else {
+ my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name);
+ DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA);
+ }
+ }
+
+ DBUG_RETURN(create_table(str, table_arg, create_info->auto_increment_value));
+}
+
+/**
@note
This function is used only when the table has not yet been opened, and
keyread_allowed bitmap doesn't have the correct values yet.
@@ -7928,9 +7836,10 @@ bool ha_rocksdb::check_keyread_allowed(uint inx, uint part,
int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
rocksdb::Iterator *const iter,
- const bool &full_key_match,
+ const bool /* unused */,
const rocksdb::Slice &key_slice,
const int64_t ttl_filter_ts) {
+ THD *thd = ha_thd();
/*
We are looking for the first record such that
index_tuple= lookup_tuple.
@@ -7939,6 +7848,9 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
rocksdb_smart_seek(kd.m_is_reverse_cf, iter, key_slice);
while (iter->Valid() && kd.value_matches_prefix(iter->key(), key_slice)) {
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
/*
If TTL is enabled we need to check if the given key has already expired
from the POV of the current transaction. If it has, try going to the next
@@ -7960,9 +7872,10 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
}
int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
- const bool &full_key_match,
+ const bool full_key_match,
const rocksdb::Slice &key_slice,
const int64_t ttl_filter_ts) {
+ THD *thd = ha_thd();
/*
We are looking for record with the biggest t.key such that
t.key < lookup_tuple.
@@ -7970,6 +7883,9 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice);
while (is_valid(m_scan_it)) {
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
/*
We are using full key and we've hit an exact match, or...
@@ -7994,6 +7910,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
const rocksdb::Slice &key_slice,
const int64_t ttl_filter_ts) {
+ THD *thd = ha_thd();
/*
We are looking for the first record such that
@@ -8011,6 +7928,9 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
*/
while (is_valid(m_scan_it) && kd.has_ttl() &&
should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) {
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it);
}
@@ -8019,7 +7939,7 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
int ha_rocksdb::position_to_correct_key(
const Rdb_key_def &kd, const enum ha_rkey_function &find_flag,
- const bool &full_key_match, const uchar *const key,
+ const bool full_key_match, const uchar *const key,
const key_part_map &keypart_map, const rocksdb::Slice &key_slice,
bool *const move_forward, const int64_t ttl_filter_ts) {
int rc = 0;
@@ -8027,65 +7947,66 @@ int ha_rocksdb::position_to_correct_key(
*move_forward = true;
switch (find_flag) {
- case HA_READ_KEY_EXACT:
- rc =
- read_key_exact(kd, m_scan_it, full_key_match, key_slice, ttl_filter_ts);
- break;
- case HA_READ_BEFORE_KEY:
- *move_forward = false;
- rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
- if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
- /* The record we've got is not from this index */
- rc = HA_ERR_KEY_NOT_FOUND;
- }
- break;
- case HA_READ_AFTER_KEY:
- case HA_READ_KEY_OR_NEXT:
- rc = read_after_key(kd, key_slice, ttl_filter_ts);
- if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
- /* The record we've got is not from this index */
- rc = HA_ERR_KEY_NOT_FOUND;
- }
- break;
- case HA_READ_KEY_OR_PREV:
- case HA_READ_PREFIX:
- /* This flag is not used by the SQL layer, so we don't support it yet. */
- rc = HA_ERR_UNSUPPORTED;
- break;
- case HA_READ_PREFIX_LAST:
- case HA_READ_PREFIX_LAST_OR_PREV:
- *move_forward = false;
- /*
- Find the last record with the specified index prefix lookup.
- - HA_READ_PREFIX_LAST requires that the record has the
- prefix=lookup (if there are no such records,
- HA_ERR_KEY_NOT_FOUND should be returned).
- - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no
- records with prefix=lookup, we should return the last record
- before that.
- */
- rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
- if (rc == 0) {
- const rocksdb::Slice &rkey = m_scan_it->key();
- if (!kd.covers_key(rkey)) {
+ case HA_READ_KEY_EXACT:
+ rc = read_key_exact(kd, m_scan_it, full_key_match, key_slice,
+ ttl_filter_ts);
+ break;
+ case HA_READ_BEFORE_KEY:
+ *move_forward = false;
+ rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
+ if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
+ /* The record we've got is not from this index */
+ rc = HA_ERR_KEY_NOT_FOUND;
+ }
+ break;
+ case HA_READ_AFTER_KEY:
+ case HA_READ_KEY_OR_NEXT:
+ rc = read_after_key(kd, key_slice, ttl_filter_ts);
+ if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
/* The record we've got is not from this index */
rc = HA_ERR_KEY_NOT_FOUND;
- } else if (find_flag == HA_READ_PREFIX_LAST) {
- uint size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple,
- key, keypart_map);
- rocksdb::Slice lookup_tuple(reinterpret_cast<char *>(m_sk_packed_tuple),
- size);
-
- // We need to compare the key we've got with the original search prefix.
- if (!kd.value_matches_prefix(rkey, lookup_tuple)) {
+ }
+ break;
+ case HA_READ_KEY_OR_PREV:
+ case HA_READ_PREFIX:
+ /* This flag is not used by the SQL layer, so we don't support it yet. */
+ rc = HA_ERR_UNSUPPORTED;
+ break;
+ case HA_READ_PREFIX_LAST:
+ case HA_READ_PREFIX_LAST_OR_PREV:
+ *move_forward = false;
+ /*
+ Find the last record with the specified index prefix lookup.
+ - HA_READ_PREFIX_LAST requires that the record has the
+ prefix=lookup (if there are no such records,
+ HA_ERR_KEY_NOT_FOUND should be returned).
+ - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no
+ records with prefix=lookup, we should return the last record
+ before that.
+ */
+ rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
+ if (rc == 0) {
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ if (!kd.covers_key(rkey)) {
+ /* The record we've got is not from this index */
rc = HA_ERR_KEY_NOT_FOUND;
+ } else if (find_flag == HA_READ_PREFIX_LAST) {
+ uint size = kd.pack_index_tuple(table, m_pack_buffer,
+ m_sk_packed_tuple, key, keypart_map);
+ rocksdb::Slice lookup_tuple(
+ reinterpret_cast<char *>(m_sk_packed_tuple), size);
+
+ // We need to compare the key we've got with the original search
+ // prefix.
+ if (!kd.value_matches_prefix(rkey, lookup_tuple)) {
+ rc = HA_ERR_KEY_NOT_FOUND;
+ }
}
}
- }
- break;
- default:
- DBUG_ASSERT(0);
- break;
+ break;
+ default:
+ DBUG_ASSERT(0);
+ break;
}
return rc;
@@ -8094,11 +8015,10 @@ int ha_rocksdb::position_to_correct_key(
int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd,
const enum ha_rkey_function &find_flag,
const rocksdb::Slice &slice,
- const int &bytes_changed_by_succ,
+ const int bytes_changed_by_succ,
const key_range *const end_key,
uint *const end_key_packed_size) {
- if (find_flag == HA_READ_KEY_EXACT)
- return slice.size();
+ if (find_flag == HA_READ_KEY_EXACT) return slice.size();
if (find_flag == HA_READ_PREFIX_LAST) {
/*
@@ -8168,8 +8088,18 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
const rocksdb::Slice &rkey = m_scan_it->key();
const rocksdb::Slice &value = m_scan_it->value();
- bool covered_lookup =
- m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap);
+#ifndef DBUG_OFF
+ bool save_keyread_only = m_keyread_only;
+#endif
+ DBUG_EXECUTE_IF("dbug.rocksdb.HA_EXTRA_KEYREAD", { m_keyread_only = true; });
+
+ bool covered_lookup = (m_keyread_only && kd.can_cover_lookup()) ||
+ kd.covers_lookup(&value, &m_lookup_bitmap);
+
+#ifndef DBUG_OFF
+ m_keyread_only = save_keyread_only;
+#endif
+
if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
pk_size =
kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
@@ -8177,12 +8107,11 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
} else {
rc = kd.unpack_record(table, buf, &rkey, &value,
- m_verify_row_debug_checksums);
+ m_converter->get_verify_row_debug_checksums());
global_stats.covered_secondary_key_lookups.inc();
}
} else {
- if (kd.m_is_reverse_cf)
- move_forward = !move_forward;
+ if (kd.m_is_reverse_cf) move_forward = !move_forward;
rc = find_icp_matching_index_rec(move_forward, buf);
if (!rc) {
@@ -8229,11 +8158,12 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const {
DBUG_ENTER_FUNC();
- ulong base_flags = HA_READ_NEXT | // doesn't seem to be used
+ ulong base_flags = HA_READ_NEXT | // doesn't seem to be used
HA_READ_ORDER | HA_READ_RANGE | HA_READ_PREV;
- if (check_keyread_allowed(inx, part, all_parts))
+ if (check_keyread_allowed(inx, part, all_parts)) {
base_flags |= HA_KEYREAD_ONLY;
+ }
if (inx == table_share->primary_key) {
/*
@@ -8289,13 +8219,15 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
rocksdb::Slice value = m_scan_it->value();
bool covered_lookup =
- m_keyread_only || m_key_descr_arr[keyno]->covers_lookup(
- table, &value, &m_lookup_bitmap);
+ (m_keyread_only && m_key_descr_arr[keyno]->can_cover_lookup()) ||
+ m_key_descr_arr[keyno]->covers_lookup(&value, &m_lookup_bitmap);
if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
rc = m_key_descr_arr[keyno]->unpack_record(
- table, buf, &key, &value, m_verify_row_debug_checksums);
+ table, buf, &key, &value,
+ m_converter->get_verify_row_debug_checksums());
global_stats.covered_secondary_key_lookups.inc();
} else {
+ DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete_sk");
rc = get_row_by_rowid(buf, m_pk_packed_tuple, size);
}
@@ -8345,9 +8277,10 @@ int ha_rocksdb::read_range_first(const key_range *const start_key,
range_key_part = table->key_info[active_index].key_part;
- if (!start_key) // Read first record
+ if (!start_key) {
+ // Read first record
result = ha_index_first(table->record[0]);
- else {
+ } else {
#ifdef MARIAROCKS_NOT_YET
if (is_using_prohibited_gap_locks(
is_using_full_unique_key(active_index, start_key->keypart_map,
@@ -8363,8 +8296,9 @@ int ha_rocksdb::read_range_first(const key_range *const start_key,
start_key->keypart_map, start_key->flag, end_key);
})
}
- if (result)
+ if (result) {
DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result);
+ }
if (compare_key(end_range) <= 0) {
DBUG_RETURN(HA_EXIT_SUCCESS);
@@ -8441,14 +8375,23 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
const key_range *end_key) {
DBUG_ENTER_FUNC();
+ DBUG_EXECUTE_IF("myrocks_busy_loop_on_row_read", int debug_i = 0;
+ while (1) { debug_i++; });
+
int rc = 0;
+ THD *thd = ha_thd();
+ DEBUG_SYNC(thd, "rocksdb.check_flags_rmi");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ DBUG_RETURN(rc);
+ }
+
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
const uint actual_key_parts = kd.get_key_parts();
bool using_full_key = is_using_full_key(keypart_map, actual_key_parts);
- if (!end_key)
- end_key = end_range;
+ if (!end_key) end_key = end_range;
/* By default, we don't need the retrieved records to match the prefix */
m_sk_match_prefix = nullptr;
@@ -8465,8 +8408,7 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
m_pk_packed_tuple, key, keypart_map);
bool skip_lookup = is_blind_delete_enabled();
- rc = get_row_by_rowid(buf, m_pk_packed_tuple, size,
- skip_lookup, false);
+ rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, skip_lookup, false);
if (!rc && !skip_lookup) {
#ifdef MARIAROCKS_NOT_YET
@@ -8491,8 +8433,9 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple,
key, tmp_map);
if (table->key_info[active_index].user_defined_key_parts !=
- kd.get_key_parts())
+ kd.get_key_parts()) {
using_full_key = false;
+ }
} else {
packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple,
key, keypart_map);
@@ -8546,14 +8489,20 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
bool use_all_keys = false;
if (find_flag == HA_READ_KEY_EXACT &&
- my_count_bits(keypart_map) == kd.get_key_parts())
+ my_count_bits(keypart_map) == kd.get_key_parts()) {
use_all_keys = true;
+ }
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
const bool is_new_snapshot = !tx->has_snapshot();
// Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this)
for (;;) {
+ DEBUG_SYNC(thd, "rocksdb.check_flags_rmi_scan");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ break;
+ }
/*
This will open the iterator and position it at a record that's equal or
greater than the lookup tuple.
@@ -8570,9 +8519,7 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
slice, &move_forward, tx->m_snapshot_timestamp);
if (rc) {
- /* This status is returned on any error */
- table->status = STATUS_NOT_FOUND;
- DBUG_RETURN(rc);
+ break;
}
m_skip_scan_it_next_call = false;
@@ -8582,13 +8529,15 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
then we have all the rows we need. For a secondary key we now need to
lookup the primary key.
*/
- if (active_index == table->s->primary_key)
+ if (active_index == table->s->primary_key) {
rc = read_row_from_primary_key(buf);
- else
+ } else {
rc = read_row_from_secondary_key(buf, kd, move_forward);
+ }
- if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot)
+ if (!should_recreate_snapshot(rc, is_new_snapshot)) {
break; /* Exit the loop */
+ }
// release the snapshot and iterator so they will be regenerated
tx->release_snapshot();
@@ -8596,7 +8545,10 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
}
if (rc) {
- /* the only possible error condition is record-not-found */
+ /*
+ This status is returned on any error
+ the only possible error condition is record-not-found
+ */
table->status = STATUS_NOT_FOUND;
} else {
table->status = 0;
@@ -8630,13 +8582,21 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
other - error code
*/
-int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
+int ha_rocksdb::find_icp_matching_index_rec(const bool move_forward,
uchar *const buf) {
if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) {
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
+ THD *thd = ha_thd();
while (1) {
- rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
+ int rc = rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+
+ if (thd && thd->killed) {
+ return HA_ERR_QUERY_INTERRUPTED;
+ }
if (!is_valid(m_scan_it)) {
table->status = STATUS_NOT_FOUND;
@@ -8660,7 +8620,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
const rocksdb::Slice value = m_scan_it->value();
int err = kd.unpack_record(table, buf, &rkey, &value,
- m_verify_row_debug_checksums);
+ m_converter->get_verify_row_debug_checksums());
if (err != HA_EXIT_SUCCESS) {
return err;
}
@@ -8681,14 +8641,6 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
* row. */
break;
}
- /*
- TODO: should we have this here, or RockDB handles this internally?
- if (my_core::thd_killed(current_thd))
- {
- rc= HA_ERR_INTERNAL_ERROR; // doesn't matter
- break;
- }
- */
}
}
return HA_EXIT_SUCCESS;
@@ -8720,14 +8672,14 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
// Rdb_transaction *tx= get_or_create_tx(table->in_use);
// tx->snapshot= nullptr;
- const bool save_verify_row_debug_checksums = m_verify_row_debug_checksums;
- m_verify_row_debug_checksums = true;
-
+ bool save_verify_row_debug_checksums =
+ m_converter->get_verify_row_debug_checksums();
+ m_converter->set_verify_row_debug_checksums(true);
/* For each secondary index, check that we can get a PK value from it */
// NO_LINT_DEBUG
sql_print_verbose_info("CHECKTABLE %s: Checking table %s", table_name,
table_name);
- ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true
+ ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true
ha_rows row_checksums = ha_rows(-1);
bool first_index = true;
@@ -8737,20 +8689,21 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
ha_index_init(keyno, true);
ha_rows rows = 0;
ha_rows checksums = 0;
- if (first_index)
- row_checksums_at_start = m_row_checksums_checked;
+ if (first_index) {
+ row_checksums_at_start = m_converter->get_row_checksums_checked();
+ }
int res;
// NO_LINT_DEBUG
sql_print_verbose_info("CHECKTABLE %s: Checking index %s", table_name,
table->key_info[keyno].name);
while (1) {
- if (!rows)
+ if (!rows) {
res = index_first(table->record[0]);
- else
+ } else {
res = index_next(table->record[0]);
+ }
- if (res == HA_ERR_END_OF_FILE)
- break;
+ if (res == HA_ERR_END_OF_FILE) break;
if (res) {
// error
// NO_LINT_DEBUG
@@ -8771,16 +8724,18 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
if ((res = get_row_by_rowid(table->record[0], rowkey_copy.ptr(),
rowkey_copy.length()))) {
// NO_LINT_DEBUG
- sql_print_error("CHECKTABLE %s: .. row %lld: "
- "failed to fetch row by rowid",
- table_name, rows);
+ sql_print_error(
+ "CHECKTABLE %s: .. row %lld: "
+ "failed to fetch row by rowid",
+ table_name, rows);
goto error;
}
longlong hidden_pk_id = 0;
if (has_hidden_pk(table) &&
- read_hidden_pk_id_from_rowkey(&hidden_pk_id))
+ read_hidden_pk_id_from_rowkey(&hidden_pk_id)) {
goto error;
+ }
/* Check if we get the same PK value */
uint packed_size = m_pk_descr->pack_record(
@@ -8801,9 +8756,10 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
if (packed_size != sec_key_copy.length() ||
memcmp(m_sk_packed_tuple, sec_key_copy.ptr(), packed_size)) {
// NO_LINT_DEBUG
- sql_print_error("CHECKTABLE %s: .. row %lld: "
- "secondary index value mismatch",
- table_name, rows);
+ sql_print_error(
+ "CHECKTABLE %s: .. row %lld: "
+ "secondary index value mismatch",
+ table_name, rows);
goto print_and_error;
}
rows++;
@@ -8835,7 +8791,8 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
table_name, rows, checksums);
if (first_index) {
- row_checksums = m_row_checksums_checked - row_checksums_at_start;
+ row_checksums =
+ m_converter->get_row_checksums_checked() - row_checksums_at_start;
first_index = false;
}
ha_index_end();
@@ -8848,16 +8805,16 @@ int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
}
extra(HA_EXTRA_NO_KEYREAD);
- m_verify_row_debug_checksums = save_verify_row_debug_checksums;
+ m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums);
/*
- TODO: we should check also for PK records that are missing in the secondary
- indexes.
+ TODO(sergiy): we should check also for PK records that are missing in
+ the secondary indexes.
For that, need to walk through the PK and check that every PK record has a
proper counterpart in each secondary index.
*/
DBUG_RETURN(HA_ADMIN_OK);
error:
- m_verify_row_debug_checksums = save_verify_row_debug_checksums;
+ m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums);
ha_index_or_rnd_end();
extra(HA_EXTRA_NO_KEYREAD);
@@ -8867,10 +8824,11 @@ error:
static void dbug_dump_str(FILE *const out, const char *const str, int len) {
fprintf(out, "\"");
for (int i = 0; i < len; i++) {
- if (str[i] > 32)
+ if (str[i] > 32) {
fprintf(out, "%c", str[i]);
- else
+ } else {
fprintf(out, "\\%d", str[i]);
+ }
}
fprintf(out, "\"");
}
@@ -8883,8 +8841,7 @@ static void dbug_dump_str(FILE *const out, const char *const str, int len) {
void dbug_dump_database(rocksdb::DB *const db) {
FILE *const out = fopen("/tmp/rocksdb.dump", "wt");
- if (!out)
- return;
+ if (!out) return;
rocksdb::Iterator *it = db->NewIterator(rocksdb::ReadOptions());
for (it->SeekToFirst(); it->Valid(); it->Next()) {
@@ -8906,30 +8863,28 @@ rocksdb::Status ha_rocksdb::get_for_update(
DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE);
const bool exclusive = m_lock_rows != RDB_LOCK_READ;
- rocksdb::Status s = tx->get_for_update(column_family, key, value, exclusive);
-
- // If we have a lock conflict and we are running in READ COMMITTTED mode
- // release and reacquire the snapshot and then retry the get_for_update().
- if (s.IsBusy() && !s.IsDeadlock() &&
- my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) {
- tx->release_snapshot();
- tx->acquire_snapshot(false);
-
- s = tx->get_for_update(column_family, key, value, exclusive);
- }
+ const bool do_validate =
+ my_core::thd_tx_isolation(ha_thd()) > ISO_READ_COMMITTED;
+ rocksdb::Status s =
+ tx->get_for_update(column_family, key, value, exclusive, do_validate);
+#ifndef DBUG_OFF
+ ++rocksdb_num_get_for_update_calls;
+#endif
return s;
}
-bool ha_rocksdb::is_blind_delete_enabled()
-{
+bool ha_rocksdb::is_blind_delete_enabled() {
THD *thd = ha_thd();
+ /*
+ Note: in MariaDB, thd->lex->table_count is only set for multi-table DELETE,
+ not for single-table DELETE. So we check thd->lex->query_tables instead.
+ */
return (THDVAR(thd, blind_delete_primary_key) &&
thd->lex->sql_command == SQLCOM_DELETE &&
- thd->lex->table_count == 1 &&
+ thd->lex->query_tables && !thd->lex->query_tables->next_global &&
table->s->keys == 1 &&
- !has_hidden_pk(table) &&
- !thd->rgi_slave);
+ !has_hidden_pk(table) && !thd->rgi_slave);
}
/*
@@ -8957,8 +8912,9 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
DEBUG_SYNC(ha_thd(), "rocksdb.get_row_by_rowid");
DBUG_EXECUTE_IF("dbug.rocksdb.get_row_by_rowid", {
THD *thd = ha_thd();
- const char act[] = "now signal Reached "
- "wait_for signal.rocksdb.get_row_by_rowid_let_running";
+ const char act[] =
+ "now signal Reached "
+ "wait_for signal.rocksdb.get_row_by_rowid_let_running";
DBUG_ASSERT(opt_debug_sync_timeout > 0);
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
};);
@@ -8967,8 +8923,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
rocksdb::Status s;
/* Pretend row found without looking up */
- if (skip_lookup)
- {
+ if (skip_lookup) {
#ifdef MARIAROCKS_NOT_YET
stats.rows_deleted_blind++;
#endif
@@ -8981,6 +8936,17 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
if (m_lock_rows == RDB_LOCK_NONE) {
tx->acquire_snapshot(true);
s = tx->get(m_pk_descr->get_cf(), key_slice, &m_retrieved_record);
+ } else if (m_insert_with_update && m_dup_pk_found) {
+ DBUG_ASSERT(m_pk_descr->get_keyno() == m_dupp_errkey);
+ DBUG_ASSERT(m_dup_pk_retrieved_record.length() ==
+ m_retrieved_record.size());
+ DBUG_ASSERT(memcmp(m_dup_pk_retrieved_record.ptr(),
+ m_retrieved_record.data(),
+ m_retrieved_record.size()) == 0);
+
+ // do nothing - we already have the result in m_retrieved_record and
+ // already taken the lock
+ s = rocksdb::Status::OK();
} else {
s = get_for_update(tx, m_pk_descr->get_cf(), key_slice,
&m_retrieved_record);
@@ -9035,8 +9001,7 @@ int ha_rocksdb::index_next(uchar *const buf) {
}
int rc = index_next_with_direction(buf, moves_forward);
- if (rc == HA_ERR_KEY_NOT_FOUND)
- rc = HA_ERR_END_OF_FILE;
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
DBUG_RETURN(rc);
}
@@ -9055,8 +9020,7 @@ int ha_rocksdb::index_prev(uchar *const buf) {
}
int rc = index_next_with_direction(buf, moves_forward);
- if (rc == HA_ERR_KEY_NOT_FOUND)
- rc = HA_ERR_END_OF_FILE;
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
DBUG_RETURN(rc);
}
@@ -9069,19 +9033,33 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) {
if (active_index == pk_index(table, m_tbl_def)) {
rc = rnd_next_with_direction(buf, move_forward);
} else {
- if (m_skip_scan_it_next_call) {
- m_skip_scan_it_next_call = false;
- } else {
- if (move_forward)
- m_scan_it->Next(); /* this call cannot fail */
- else
- m_scan_it->Prev();
+ THD *thd = ha_thd();
+ for (;;) {
+ DEBUG_SYNC(thd, "rocksdb.check_flags_inwd");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ break;
+ }
+ if (m_skip_scan_it_next_call) {
+ m_skip_scan_it_next_call = false;
+ } else {
+ if (move_forward) {
+ m_scan_it->Next(); /* this call cannot fail */
+ } else {
+ m_scan_it->Prev();
+ }
+ }
+ rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index],
+ m_scan_it, !move_forward);
+ if (rc != HA_EXIT_SUCCESS) {
+ break;
+ }
+ rc = find_icp_matching_index_rec(move_forward, buf);
+ if (!rc) rc = secondary_index_read(active_index, buf);
+ if (!should_skip_invalidated_record(rc)) {
+ break;
+ }
}
- rocksdb_skip_expired_records(*m_key_descr_arr[active_index], m_scan_it,
- !move_forward);
- rc = find_icp_matching_index_rec(move_forward, buf);
- if (!rc)
- rc = secondary_index_read(active_index, buf);
}
DBUG_RETURN(rc);
@@ -9099,8 +9077,7 @@ int ha_rocksdb::index_first(uchar *const buf) {
int rc = m_key_descr_arr[active_index]->m_is_reverse_cf
? index_last_intern(buf)
: index_first_intern(buf);
- if (rc == HA_ERR_KEY_NOT_FOUND)
- rc = HA_ERR_END_OF_FILE;
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
DBUG_RETURN(rc);
}
@@ -9117,8 +9094,7 @@ int ha_rocksdb::index_last(uchar *const buf) {
int rc = m_key_descr_arr[active_index]->m_is_reverse_cf
? index_first_intern(buf)
: index_last_intern(buf);
- if (rc == HA_ERR_KEY_NOT_FOUND)
- rc = HA_ERR_END_OF_FILE;
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
DBUG_RETURN(rc);
}
@@ -9186,8 +9162,9 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
m_skip_scan_it_next_call = true;
rc = index_next_with_direction(buf, true);
- if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot)
- break; // exit the loop
+ if (!should_recreate_snapshot(rc, is_new_snapshot)) {
+ break; /* exit the loop */
+ }
// release the snapshot and iterator so they will be regenerated
tx->release_snapshot();
@@ -9261,11 +9238,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) {
DBUG_ASSERT(key != nullptr);
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
- if (kd.m_is_reverse_cf) {
- kd.get_infimum_key(key, &key_size);
- } else {
- kd.get_supremum_key(key, &key_size);
- }
+ int key_end_matching_bytes = kd.get_last_key(key, &key_size);
rocksdb::Slice index_key((const char *)key, key_size);
@@ -9276,7 +9249,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) {
// Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this)
for (;;) {
- setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE);
+ setup_scan_iterator(kd, &index_key, false, key_end_matching_bytes);
m_scan_it->SeekForPrev(index_key);
m_skip_scan_it_next_call = false;
@@ -9285,12 +9258,12 @@ int ha_rocksdb::index_last_intern(uchar *const buf) {
rc = rnd_next_with_direction(buf, false);
} else {
rc = find_icp_matching_index_rec(false /*move_forward*/, buf);
- if (!rc)
- rc = secondary_index_read(active_index, buf);
+ if (!rc) rc = secondary_index_read(active_index, buf);
}
- if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !is_new_snapshot)
+ if (!should_recreate_snapshot(rc, is_new_snapshot)) {
break; /* exit the loop */
+ }
// release the snapshot and iterator so they will be regenerated
tx->release_snapshot();
@@ -9329,7 +9302,7 @@ void ha_rocksdb::unlock_row() {
- If the index is PRIMARY KEY, and if all of the columns of the table
are covered by the PRIMARY KEY, SingleDelete can be used.
*/
-bool ha_rocksdb::can_use_single_delete(const uint &index) const {
+bool ha_rocksdb::can_use_single_delete(const uint index) const {
return (index != pk_index(table, m_tbl_def) ||
(!has_hidden_pk(table) &&
table->key_info[index].ext_key_parts == table->s->fields));
@@ -9345,12 +9318,18 @@ bool ha_rocksdb::skip_unique_check() const {
3) the user set unique_checks option to 0, and the table does not have
any indexes. If the table has secondary keys, then those might becomes
inconsisted/corrupted
+ 4) We're using read-free replication
*/
return THDVAR(table->in_use, bulk_load) ||
(m_force_skip_unique_check && m_skip_unique_check) ||
(my_core::thd_test_options(table->in_use,
OPTION_RELAXED_UNIQUE_CHECKS) &&
- m_tbl_def->m_key_count == 1);
+ m_tbl_def->m_key_count == 1) ||
+#ifdef MARIAROCKS_NOT_YET
+ use_read_free_rpl();
+#else
+ FALSE;
+#endif
}
#ifdef MARIAROCKS_NOT_YET // MDEV-10975
@@ -9450,10 +9429,9 @@ const char *ha_rocksdb::get_key_comment(const uint index,
return table_arg->key_info[index].comment.str;
}
-const std::string ha_rocksdb::generate_cf_name(const uint index,
- const TABLE *const table_arg,
- const Rdb_tbl_def *const tbl_def_arg,
- bool *per_part_match_found) {
+const std::string ha_rocksdb::generate_cf_name(
+ const uint index, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found) {
DBUG_ASSERT(table_arg != nullptr);
DBUG_ASSERT(tbl_def_arg != nullptr);
DBUG_ASSERT(per_part_match_found != nullptr);
@@ -9498,6 +9476,9 @@ const std::string ha_rocksdb::get_table_comment(const TABLE *const table_arg) {
}
/**
+ Write a new row
+
+ @param[in] buf new row data to write
@return
HA_EXIT_SUCCESS OK
other HA_ERR error code (can be SE-specific)
@@ -9520,6 +9501,11 @@ int ha_rocksdb::write_row(uchar *const buf) {
}
}
+ // clear cache at beginning of write for INSERT ON DUPLICATE
+ // we may get multiple write->fail->read->update if there are multiple
+ // values from INSERT
+ m_dup_pk_found = false;
+
const int rv = update_write_row(nullptr, buf, skip_unique_check());
if (rv == 0) {
@@ -9552,6 +9538,15 @@ void ha_rocksdb::set_last_rowkey(const uchar *const old_data) {
#endif
}
+/**
+ Collect update data for primary key
+
+ @param[in, out] row_info hold all data for update row, such as
+ new row data/old row data
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) {
int size;
@@ -9564,10 +9559,9 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) {
row_info->new_pk_unpack_info = &m_pk_unpack_info;
- size =
- m_pk_descr->pack_record(table, m_pack_buffer, row_info->new_data,
- m_pk_packed_tuple, row_info->new_pk_unpack_info,
- false, 0, 0, nullptr, &row_info->ttl_pk_offset);
+ size = m_pk_descr->pack_record(
+ table, m_pack_buffer, row_info->new_data, m_pk_packed_tuple,
+ row_info->new_pk_unpack_info, false, 0, 0, nullptr);
} else if (row_info->old_data == nullptr) {
row_info->hidden_pk_id = update_hidden_pk_val();
size =
@@ -9591,30 +9585,32 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) {
return HA_EXIT_SUCCESS;
}
-int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id,
+/**
+ Check the specified primary key value is unique and also lock the row
+
+ @param[in] key_id key index
+ @param[in] row_info hold all data for update row, such as old row
+ data and new row data
+ @param[out] found whether the primary key exists before.
+ @param[out] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::check_and_lock_unique_pk(const uint key_id,
const struct update_row_info &row_info,
- bool *const found,
- bool *const pk_changed) {
+ bool *const found) {
DBUG_ASSERT(found != nullptr);
- DBUG_ASSERT(pk_changed != nullptr);
- *pk_changed = false;
-
- /*
- For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs
- always require locking.
- */
- if (row_info.old_pk_slice.size() > 0) {
- /*
- If the keys are the same, then no lock is needed
- */
- if (!row_info.new_pk_slice.compare(row_info.old_pk_slice)) {
- *found = false;
- return HA_EXIT_SUCCESS;
- }
+ DBUG_ASSERT(row_info.old_pk_slice.size() == 0 ||
+ row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0);
- *pk_changed = true;
- }
+ /* Ignore PK violations if this is a optimized 'replace into' */
+#ifdef MARIAROCKS_NOT_YET
+ const bool ignore_pk_unique_check = ha_thd()->lex->blind_replace_into;
+#else
+ const bool ignore_pk_unique_check= false;
+#endif
/*
Perform a read to determine if a duplicate entry exists. For primary
@@ -9637,17 +9633,56 @@ int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id,
*/
const rocksdb::Status s =
get_for_update(row_info.tx, m_pk_descr->get_cf(), row_info.new_pk_slice,
- &m_retrieved_record);
+ ignore_pk_unique_check ? nullptr : &m_retrieved_record);
if (!s.ok() && !s.IsNotFound()) {
return row_info.tx->set_status_error(
table->in_use, s, *m_key_descr_arr[key_id], m_tbl_def, m_table_handler);
}
- *found = !s.IsNotFound();
+ bool key_found = ignore_pk_unique_check ? false : !s.IsNotFound();
+
+ /*
+ If the pk key has ttl, we may need to pretend the row wasn't
+ found if it is already expired.
+ */
+ if (key_found && m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
+ (row_info.tx->m_snapshot_timestamp
+ ? row_info.tx->m_snapshot_timestamp
+ : static_cast<int64_t>(std::time(nullptr))))) {
+ key_found = false;
+ }
+
+ if (key_found && row_info.old_data == nullptr && m_insert_with_update) {
+ // In INSERT ON DUPLICATE KEY UPDATE ... case, if the insert failed
+ // due to a duplicate key, remember the last key and skip the check
+ // next time
+ m_dup_pk_found = true;
+
+#ifndef DBUG_OFF
+ // save it for sanity checking later
+ m_dup_pk_retrieved_record.copy(m_retrieved_record.data(),
+ m_retrieved_record.size(), &my_charset_bin);
+#endif
+ }
+
+ *found = key_found;
+
return HA_EXIT_SUCCESS;
}
-int ha_rocksdb::check_and_lock_sk(const uint &key_id,
+/**
+ Check the specified secondary key value is unique and also lock the row
+
+ @param[in] key_id key index
+ @param[in] row_info hold all data for update row, such as old row
+ data and new row data
+ @param[out] found whether specified key value exists before.
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int ha_rocksdb::check_and_lock_sk(const uint key_id,
const struct update_row_info &row_info,
bool *const found) {
DBUG_ASSERT(found != nullptr);
@@ -9777,8 +9812,18 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id,
return HA_EXIT_SUCCESS;
}
+/**
+ Enumerate all keys to check their uniquess and also lock it
+
+ @param[in] row_info hold all data for update row, such as old row
+ data and new row data
+ @param[out] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::check_uniqueness_and_lock(
- const struct update_row_info &row_info, bool *const pk_changed) {
+ const struct update_row_info &row_info, bool pk_changed) {
/*
Go through each index and determine if the index has uniqueness
requirements. If it does, then try to obtain a row lock on the new values.
@@ -9790,7 +9835,12 @@ int ha_rocksdb::check_uniqueness_and_lock(
int rc;
if (is_pk(key_id, table, m_tbl_def)) {
- rc = check_and_lock_unique_pk(key_id, row_info, &found, pk_changed);
+ if (row_info.old_pk_slice.size() > 0 && !pk_changed) {
+ found = false;
+ rc = HA_EXIT_SUCCESS;
+ } else {
+ rc = check_and_lock_unique_pk(key_id, row_info, &found);
+ }
} else {
rc = check_and_lock_sk(key_id, row_info, &found);
}
@@ -9799,23 +9849,11 @@ int ha_rocksdb::check_uniqueness_and_lock(
return rc;
}
- /*
- If the pk key has ttl, we may need to pretend the row wasn't
- found if it is already expired. The pk record is read into
- m_retrieved_record by check_and_lock_unique_pk().
- */
- if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() &&
- should_hide_ttl_rec(*m_pk_descr, m_retrieved_record,
- (row_info.tx->m_snapshot_timestamp
- ? row_info.tx->m_snapshot_timestamp
- : static_cast<int64_t>(std::time(nullptr))))) {
- found = false;
- }
-
if (found) {
/* There is a row with this key already, so error out. */
errkey = key_id;
m_dupp_errkey = errkey;
+
return HA_ERR_FOUND_DUPP_KEY;
}
}
@@ -9823,19 +9861,31 @@ int ha_rocksdb::check_uniqueness_and_lock(
return HA_EXIT_SUCCESS;
}
+/**
+ Check whether secondary key value is duplicate or not
+
+ @param[in] table_arg the table currently working on
+ @param[in key_def the key_def is being checked
+ @param[in] key secondary key storage data
+ @param[out] sk_info hold secondary key memcmp datas(new/old)
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg,
- const Rdb_key_def &index,
+ const Rdb_key_def &key_def,
const rocksdb::Slice *key,
struct unique_sk_buf_info *sk_info) {
uint n_null_fields = 0;
- const rocksdb::Comparator *index_comp = index.get_cf()->GetComparator();
+ const rocksdb::Comparator *index_comp = key_def.get_cf()->GetComparator();
/* Get proper SK buffer. */
uchar *sk_buf = sk_info->swap_and_get_sk_buf();
/* Get memcmp form of sk without extended pk tail */
uint sk_memcmp_size =
- index.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields);
+ key_def.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields);
sk_info->sk_memcmp_key =
rocksdb::Slice(reinterpret_cast<char *>(sk_buf), sk_memcmp_size);
@@ -9855,12 +9905,17 @@ int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
const rocksdb::Slice &value, bool sort) {
DBUG_ENTER_FUNC();
int res;
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
// In the case of unsorted inserts, m_sst_info allocated here is not
// used to store the keys. It is still used to indicate when tables
// are switched.
- if (m_sst_info == nullptr || m_sst_info->is_committed()) {
+ if (m_sst_info == nullptr || m_sst_info->is_done()) {
m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
kd.get_name(), cf, *rocksdb_db_options,
THDVAR(ha_thd(), trace_sst_api)));
@@ -9893,17 +9948,61 @@ int ha_rocksdb::finalize_bulk_load(bool print_client_error) {
/* Skip if there are no possible ongoing bulk loads */
if (m_sst_info) {
- res = m_sst_info->commit(print_client_error);
+ if (m_sst_info->is_done()) {
+ m_sst_info.reset();
+ DBUG_RETURN(res);
+ }
+
+ Rdb_sst_info::Rdb_sst_commit_info commit_info;
+
+ // Wrap up the current work in m_sst_info and get ready to commit
+ // This transfer the responsibility of commit over to commit_info
+ res = m_sst_info->finish(&commit_info, print_client_error);
+ if (res == 0) {
+ // Make sure we have work to do - under race condition we could lose
+ // to another thread and end up with no work
+ if (commit_info.has_work()) {
+ rocksdb::IngestExternalFileOptions opts;
+ opts.move_files = true;
+ opts.snapshot_consistency = false;
+ opts.allow_global_seqno = false;
+ opts.allow_blocking_flush = false;
+
+ const rocksdb::Status s = rdb->IngestExternalFile(
+ commit_info.get_cf(), commit_info.get_committed_files(), opts);
+ if (!s.ok()) {
+ if (print_client_error) {
+ Rdb_sst_info::report_error_msg(s, nullptr);
+ }
+ res = HA_ERR_ROCKSDB_BULK_LOAD;
+ } else {
+ // Mark the list of SST files as committed, otherwise they'll get
+ // cleaned up when commit_info destructs
+ commit_info.commit();
+ }
+ }
+ }
m_sst_info.reset();
}
DBUG_RETURN(res);
}
-int ha_rocksdb::update_pk(const Rdb_key_def &kd,
- const struct update_row_info &row_info,
- const bool &pk_changed) {
- const uint key_id = kd.get_keyno();
- const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def);
+/**
+ Update an existing primary key record or write a new primary key record
+
+ @param[in] kd the primary key is being update/write
+ @param[in] update_row_info hold all row data, such as old row data and
+ new row data
+ @param[in] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
+int ha_rocksdb::update_write_pk(const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ bool pk_changed) {
+ uint key_id = kd.get_keyno();
+ bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def);
ulonglong bytes_written = 0;
/*
@@ -9931,7 +10030,10 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
int rc = HA_EXIT_SUCCESS;
rocksdb::Slice value_slice;
/* Prepare the new record to be written into RocksDB */
- if ((rc = convert_record_to_storage_format(row_info, &value_slice))) {
+ if ((rc = m_converter->encode_value_slice(
+ m_pk_descr, row_info.new_pk_slice, row_info.new_pk_unpack_info,
+ !row_info.old_pk_slice.empty(), should_store_row_debug_checksums(),
+ m_ttl_bytes, &m_ttl_bytes_updated, &value_slice))) {
return rc;
}
@@ -9951,7 +10053,9 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
row_info.tx->get_indexed_write_batch()->Put(cf, row_info.new_pk_slice,
value_slice);
} else {
- const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice);
+ const bool assume_tracked = can_assume_tracked(ha_thd());
+ const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice,
+ assume_tracked);
if (!s.ok()) {
if (s.IsBusy()) {
errkey = table->s->primary_key;
@@ -9971,9 +10075,22 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
return rc;
}
-int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
- const struct update_row_info &row_info,
- const bool bulk_load_sk) {
+/**
+ update an existing secondary key record or write a new secondary key record
+
+ @param[in] table_arg Table we're working on
+ @param[in] kd The secondary key being update/write
+ @param[in] row_info data structure contains old row data and new row data
+ @param[in] bulk_load_sk whether support bulk load. Currently it is only
+ support for write
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
+int ha_rocksdb::update_write_sk(const TABLE *const table_arg,
+ const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ const bool bulk_load_sk) {
int new_packed_size;
int old_packed_size;
int rc = HA_EXIT_SUCCESS;
@@ -9995,19 +10112,18 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
return HA_EXIT_SUCCESS;
}
- const bool store_row_debug_checksums = should_store_row_debug_checksums();
-
+ bool store_row_debug_checksums = should_store_row_debug_checksums();
new_packed_size =
kd.pack_record(table_arg, m_pack_buffer, row_info.new_data,
m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums,
- row_info.hidden_pk_id, 0, nullptr, nullptr, m_ttl_bytes);
+ row_info.hidden_pk_id, 0, nullptr, m_ttl_bytes);
if (row_info.old_data != nullptr) {
// The old value
old_packed_size = kd.pack_record(
table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old,
&m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0,
- nullptr, nullptr, m_ttl_bytes);
+ nullptr, m_ttl_bytes);
/*
Check if we are going to write the same value. This can happen when
@@ -10067,13 +10183,22 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
return rc;
}
-int ha_rocksdb::update_indexes(const struct update_row_info &row_info,
- const bool &pk_changed) {
+/**
+ Update existing indexes(PK/SKs) or write new indexes(PK/SKs)
+
+ @param[in] row_info hold all row data, such as old key/new key
+ @param[in] pk_changed whether primary key is changed
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
+int ha_rocksdb::update_write_indexes(const struct update_row_info &row_info,
+ const bool pk_changed) {
int rc;
bool bulk_load_sk;
// The PK must be updated first to pull out the TTL value.
- rc = update_pk(*m_pk_descr, row_info, pk_changed);
+ rc = update_write_pk(*m_pk_descr, row_info, pk_changed);
if (rc != HA_EXIT_SUCCESS) {
return rc;
}
@@ -10088,7 +10213,8 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info,
continue;
}
- rc = update_sk(table, *m_key_descr_arr[key_id], row_info, bulk_load_sk);
+ rc = update_write_sk(table, *m_key_descr_arr[key_id], row_info,
+ bulk_load_sk);
if (rc != HA_EXIT_SUCCESS) {
return rc;
}
@@ -10097,11 +10223,26 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info,
return HA_EXIT_SUCCESS;
}
+/**
+ Update an existing row or write a new row
+
+ @param[in] old_data nullptr for write, non-null for update
+ @param[in] new_data non-null for write/update
+ @param[in] skip_unique_check whether to check uniqueness
+ @return
+ HA_EXIT_SUCCESS OK
+ Other HA_ERR error code (can be SE-specific)
+ */
int ha_rocksdb::update_write_row(const uchar *const old_data,
const uchar *const new_data,
const bool skip_unique_check) {
DBUG_ENTER_FUNC();
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
bool pk_changed = false;
struct update_row_info row_info;
@@ -10109,7 +10250,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.new_data = new_data;
row_info.skip_unique_check = skip_unique_check;
row_info.new_pk_unpack_info = nullptr;
-
set_last_rowkey(old_data);
row_info.tx = get_or_create_tx(table->in_use);
@@ -10130,12 +10270,20 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
DBUG_RETURN(rc);
}
+ /*
+ For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs
+ always require locking.
+ */
+ if (row_info.old_pk_slice.size() > 0) {
+ pk_changed = row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0;
+ }
+
if (!skip_unique_check) {
/*
Check to see if we are going to have failures because of unique
keys. Also lock the appropriate key values.
*/
- rc = check_uniqueness_and_lock(row_info, &pk_changed);
+ rc = check_uniqueness_and_lock(row_info, pk_changed);
if (rc != HA_EXIT_SUCCESS) {
DBUG_RETURN(rc);
}
@@ -10149,7 +10297,7 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
here because updates to the transaction will be made and those updates
cannot be easily removed without rolling back the entire transaction.
*/
- rc = update_indexes(row_info, pk_changed);
+ rc = update_write_indexes(row_info, pk_changed);
if (rc != HA_EXIT_SUCCESS) {
DBUG_RETURN(rc);
}
@@ -10190,18 +10338,29 @@ void ha_rocksdb::setup_iterator_bounds(
const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len,
uchar *const lower_bound, uchar *const upper_bound,
rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice) {
- uint min_len = std::min(eq_cond.size(), bound_len);
- memcpy(upper_bound, eq_cond.data(), min_len);
- kd.successor(upper_bound, min_len);
- memcpy(lower_bound, eq_cond.data(), min_len);
- kd.predecessor(lower_bound, min_len);
+ // If eq_cond is shorter than Rdb_key_def::INDEX_NUMBER_SIZE, we should be
+ // able to get better bounds just by using index id directly.
+ if (eq_cond.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) {
+ DBUG_ASSERT(bound_len == Rdb_key_def::INDEX_NUMBER_SIZE);
+ uint size;
+ kd.get_infimum_key(lower_bound, &size);
+ DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE);
+ kd.get_supremum_key(upper_bound, &size);
+ DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE);
+ } else {
+ DBUG_ASSERT(bound_len <= eq_cond.size());
+ memcpy(upper_bound, eq_cond.data(), bound_len);
+ kd.successor(upper_bound, bound_len);
+ memcpy(lower_bound, eq_cond.data(), bound_len);
+ kd.predecessor(lower_bound, bound_len);
+ }
if (kd.m_is_reverse_cf) {
- *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, min_len);
- *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, min_len);
+ *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len);
+ *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len);
} else {
- *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, min_len);
- *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, min_len);
+ *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len);
+ *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len);
}
}
@@ -10220,8 +10379,17 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
bool skip_bloom = true;
const rocksdb::Slice eq_cond(slice->data(), eq_cond_len);
+ // The size of m_scan_it_lower_bound (and upper) is technically
+ // max_packed_sk_len as calculated in ha_rocksdb::alloc_key_buffers. Rather
+ // than recalculating that number, we pass in the max of eq_cond_len and
+ // Rdb_key_def::INDEX_NUMBER_SIZE which is guaranteed to be smaller than
+ // max_packed_sk_len, hence ensuring no buffer overrun.
+ //
+ // See ha_rocksdb::setup_iterator_bounds on how the bound_len parameter is
+ // used.
if (check_bloom_and_set_bounds(
- ha_thd(), kd, eq_cond, use_all_keys, eq_cond_len,
+ ha_thd(), kd, eq_cond, use_all_keys,
+ std::max(eq_cond_len, (uint)Rdb_key_def::INDEX_NUMBER_SIZE),
m_scan_it_lower_bound, m_scan_it_upper_bound,
&m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice)) {
skip_bloom = false;
@@ -10259,7 +10427,8 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
m_scan_it_snapshot = rdb->GetSnapshot();
auto read_opts = rocksdb::ReadOptions();
- read_opts.total_order_seek = true; // TODO: set based on WHERE conditions
+ // TODO(mung): set based on WHERE conditions
+ read_opts.total_order_seek = true;
read_opts.snapshot = m_scan_it_snapshot;
m_scan_it = rdb->NewIterator(read_opts, kd.get_cf());
} else {
@@ -10284,12 +10453,12 @@ void ha_rocksdb::release_scan_iterator() {
void ha_rocksdb::setup_iterator_for_rnd_scan() {
uint key_size;
- int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
+ int key_start_matching_bytes =
+ m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size);
rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
- setup_scan_iterator(*m_pk_descr, &table_key, false,
- key_start_matching_bytes);
+ setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes);
m_scan_it->Seek(table_key);
m_skip_scan_it_next_call = true;
}
@@ -10302,8 +10471,16 @@ void ha_rocksdb::setup_iterator_for_rnd_scan() {
int ha_rocksdb::rnd_init(bool scan) {
DBUG_ENTER_FUNC();
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
- setup_read_decoders();
+
+ // when this table is being updated, decode all fields
+ m_converter->setup_field_decoders(table->read_set,
+ m_lock_rows == RDB_LOCK_WRITE);
if (scan) {
m_rnd_scan_is_new_snapshot = !tx->has_snapshot();
@@ -10331,9 +10508,9 @@ int ha_rocksdb::rnd_next(uchar *const buf) {
int rc;
for (;;) {
rc = rnd_next_with_direction(buf, true);
- if (rc != HA_ERR_ROCKSDB_STATUS_BUSY || !m_rnd_scan_is_new_snapshot)
+ if (!should_recreate_snapshot(rc, m_rnd_scan_is_new_snapshot)) {
break; /* exit the loop */
-
+ }
// release the snapshot and iterator and then regenerate them
Rdb_transaction *tx = get_or_create_tx(table->in_use);
tx->release_snapshot();
@@ -10343,8 +10520,7 @@ int ha_rocksdb::rnd_next(uchar *const buf) {
m_rnd_scan_is_new_snapshot = false;
- if (rc == HA_ERR_KEY_NOT_FOUND)
- rc = HA_ERR_END_OF_FILE;
+ if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE;
DBUG_RETURN(rc);
}
@@ -10356,6 +10532,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
DBUG_ENTER_FUNC();
int rc;
+ THD *thd = ha_thd();
table->status = STATUS_NOT_FOUND;
#ifdef MARIAROCKS_NOT_YET
@@ -10374,13 +10551,20 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
}
for (;;) {
+ DEBUG_SYNC(thd, "rocksdb.check_flags_rnwd");
+ if (thd && thd->killed) {
+ rc = HA_ERR_QUERY_INTERRUPTED;
+ break;
+ }
+
if (m_skip_scan_it_next_call) {
m_skip_scan_it_next_call = false;
} else {
- if (move_forward)
+ if (move_forward) {
m_scan_it->Next(); /* this call cannot fail */
- else
+ } else {
m_scan_it->Prev(); /* this call cannot fail */
+ }
}
if (!is_valid(m_scan_it)) {
@@ -10414,11 +10598,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
const rocksdb::Status s =
get_for_update(tx, m_pk_descr->get_cf(), key, &m_retrieved_record);
if (s.IsNotFound() &&
- my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED) {
- // This occurs if we accessed a row, tried to lock it, failed,
- // released and reacquired the snapshot (because of READ COMMITTED
- // mode) and the row was deleted by someone else in the meantime.
- // If so, we just want to move on to the next row.
+ should_skip_invalidated_record(HA_ERR_KEY_NOT_FOUND)) {
continue;
}
@@ -10478,11 +10658,18 @@ int ha_rocksdb::rnd_end() {
int ha_rocksdb::index_init(uint idx, bool sorted) {
DBUG_ENTER_FUNC();
+ THD *thd = ha_thd();
+ if (thd && thd->killed) {
+ DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED);
+ }
+
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
DBUG_ASSERT(tx != nullptr);
- setup_read_decoders();
- m_start_range= NULL;
+ m_start_range= NULL;
+ // when this table is being updated, decode all fields
+ m_converter->setup_field_decoders(table->read_set,
+ m_lock_rows == RDB_LOCK_WRITE);
if (!m_keyread_only) {
m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap);
@@ -10526,10 +10713,20 @@ int ha_rocksdb::truncate() {
DBUG_ASSERT(m_tbl_def != nullptr);
- m_tbl_def->m_auto_incr_val = 1;
- m_tbl_def->m_hidden_pk_val = 1;
+ // Save table name to use later
+ std::string table_name = m_tbl_def->full_tablename();
+
+ // Delete current table
+ int err = delete_table(m_tbl_def);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
+ }
- DBUG_RETURN(remove_rows(m_tbl_def));
+ // Reset auto_increment_value to 1 if auto-increment feature is enabled
+ // By default, the starting valid value for auto_increment_value is 1
+ DBUG_RETURN(create_table(
+ table_name, table,
+ table->found_next_number_field ? 1 : 0 /* auto_increment_value */));
}
/*
@@ -10607,9 +10804,11 @@ rocksdb::Status ha_rocksdb::delete_or_singledelete(
uint index, Rdb_transaction *const tx,
rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key) {
- if (can_use_single_delete(index))
- return tx->single_delete(column_family, key);
- return tx->delete_key(column_family, key);
+ const bool assume_tracked = can_assume_tracked(ha_thd());
+ if (can_use_single_delete(index)) {
+ return tx->single_delete(column_family, key, assume_tracked);
+ }
+ return tx->delete_key(column_family, key, assume_tracked);
}
void ha_rocksdb::update_stats(void) {
@@ -10640,8 +10839,9 @@ void ha_rocksdb::update_stats(void) {
int ha_rocksdb::info(uint flag) {
DBUG_ENTER_FUNC();
- if (!table)
+ if (!table) {
DBUG_RETURN(HA_EXIT_FAILURE);
+ }
if (flag & HA_STATUS_VARIABLE) {
/*
@@ -10669,10 +10869,8 @@ int ha_rocksdb::info(uint flag) {
// if number of records is hardcoded, we do not want to force computation
// of memtable cardinalities
- if (stats.records == 0 ||
- (rocksdb_force_compute_memtable_stats &&
- rocksdb_debug_optimizer_n_rows == 0))
- {
+ if (stats.records == 0 || (rocksdb_force_compute_memtable_stats &&
+ rocksdb_debug_optimizer_n_rows == 0)) {
// First, compute SST files stats
uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
auto r = get_range(pk_index(table, m_tbl_def), buf);
@@ -10682,8 +10880,8 @@ int ha_rocksdb::info(uint flag) {
if (stats.records == 0) {
rdb->GetApproximateSizes(m_pk_descr->get_cf(), &r, 1, &sz,
include_flags);
- stats.records+= sz/ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE;
- stats.data_file_length+= sz;
+ stats.records += sz / ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE;
+ stats.data_file_length += sz;
}
// Second, compute memtable stats. This call is expensive, so cache
// values computed for some time.
@@ -10720,14 +10918,17 @@ int ha_rocksdb::info(uint flag) {
stats.records += m_table_handler->m_mtcache_count;
stats.data_file_length += m_table_handler->m_mtcache_size;
}
+ }
- if (rocksdb_debug_optimizer_n_rows > 0)
- stats.records = rocksdb_debug_optimizer_n_rows;
+ if (rocksdb_debug_optimizer_n_rows > 0) {
+ stats.records = rocksdb_debug_optimizer_n_rows;
}
- if (stats.records != 0)
+ if (stats.records != 0) {
stats.mean_rec_length = stats.data_file_length / stats.records;
+ }
}
+
if (flag & HA_STATUS_CONST) {
ref_length = m_pk_descr->max_storage_fmt_length();
@@ -10753,8 +10954,7 @@ int ha_rocksdb::info(uint flag) {
} else {
x = 0;
}
- if (x > stats.records)
- x = stats.records;
+ if (x > stats.records) x = stats.records;
if ((x == 0 && rocksdb_debug_optimizer_no_zero_cardinality) ||
rocksdb_debug_optimizer_n_rows > 0) {
// Fake cardinality implementation. For example, (idx1, idx2, idx3)
@@ -10789,7 +10989,7 @@ int ha_rocksdb::info(uint flag) {
uniqueness violation.
*/
errkey = m_dupp_errkey;
- dup_ref = m_pk_tuple; // TODO(?): this should store packed PK.
+ dup_ref = m_pk_tuple; // TODO(?): this should store packed PK.
}
if (flag & HA_STATUS_AUTO) {
@@ -10803,8 +11003,9 @@ void ha_rocksdb::position(const uchar *const record) {
DBUG_ENTER_FUNC();
longlong hidden_pk_id = 0;
- if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id))
- DBUG_ASSERT(false); // should never reach here
+ if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) {
+ DBUG_ASSERT(false); // should never reach here
+ }
/*
Get packed primary key value from the record.
@@ -10826,8 +11027,9 @@ void ha_rocksdb::position(const uchar *const record) {
It could be that mem-comparable form of PK occupies less than ref_length
bytes. Fill the remainder with zeros.
*/
- if (ref_length > packed_size)
+ if (ref_length > packed_size) {
memset(ref + packed_size, 0, ref_length - packed_size);
+ }
DBUG_VOID_RETURN;
}
@@ -10885,8 +11087,7 @@ void ha_rocksdb::calc_updated_indexes() {
/* Walk over all key parts, including the "extended key" suffix */
const uint key_parts = kd.get_key_parts();
for (uint kp = 0; kp < key_parts; kp++) {
- if (has_hidden_pk(table) && kp + 1 == key_parts)
- break;
+ if (has_hidden_pk(table) && kp + 1 == key_parts) break;
Field *const field = kd.get_table_field_for_part_no(table, kp);
if (bitmap_is_set(table->write_set, field->field_index)) {
@@ -10899,6 +11100,9 @@ void ha_rocksdb::calc_updated_indexes() {
}
/**
+ Update an existing row
+ @param[in] old_data nullptr for write, non-null for update
+ @param[in] new_data non-null for write/update
@return
HA_EXIT_SUCCESS OK
other HA_ERR error code (can be SE-specific)
@@ -10916,7 +11120,7 @@ int ha_rocksdb::update_row(const uchar *const old_data, uchar *const new_data) {
*/
DBUG_ASSERT(new_data == table->record[0]);
- const int rv = update_write_row(old_data, new_data, false);
+ const int rv = update_write_row(old_data, new_data, skip_unique_check());
if (rv == 0) {
#ifdef MARIAROCKS_NOT_YET
@@ -11021,7 +11225,8 @@ THR_LOCK_DATA **ha_rocksdb::store_lock(THD *const thd, THR_LOCK_DATA **to,
void ha_rocksdb::read_thd_vars(THD *const thd) {
m_store_row_debug_checksums = THDVAR(thd, store_row_debug_checksums);
- m_verify_row_debug_checksums = THDVAR(thd, verify_row_debug_checksums);
+ m_converter->set_verify_row_debug_checksums(
+ THDVAR(thd, verify_row_debug_checksums));
m_checksums_pct = THDVAR(thd, checksums_pct);
}
@@ -11112,7 +11317,8 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) {
tx->io_perf_end_and_record(&m_io_perf);
tx->m_n_mysql_tables_in_use--;
if (tx->m_n_mysql_tables_in_use == 0 &&
- !my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
+ !my_core::thd_test_options(thd,
+ OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
/*
Do like InnoDB: when we get here, it's time to commit a
single-statement transaction.
@@ -11244,9 +11450,8 @@ rocksdb::Range get_range(const Rdb_key_def &kd,
}
}
-rocksdb::Range
-ha_rocksdb::get_range(const int &i,
- uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const {
+rocksdb::Range ha_rocksdb::get_range(
+ const int i, uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const {
return myrocks::get_range(*m_key_descr_arr[i], buf);
}
@@ -11257,11 +11462,10 @@ ha_rocksdb::get_range(const int &i,
but in drop_index_thread's case, it means index is marked as removed,
so no further seek will happen for the index id.
*/
-static bool is_myrocks_index_empty(
- rocksdb::ColumnFamilyHandle *cfh, const bool is_reverse_cf,
- const rocksdb::ReadOptions &read_opts,
- const uint index_id)
-{
+static bool is_myrocks_index_empty(rocksdb::ColumnFamilyHandle *cfh,
+ const bool is_reverse_cf,
+ const rocksdb::ReadOptions &read_opts,
+ const uint index_id) {
bool index_removed = false;
uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0};
rdb_netbuf_store_uint32(key_buf, index_id);
@@ -11272,8 +11476,7 @@ static bool is_myrocks_index_empty(
if (!it->Valid()) {
index_removed = true;
} else {
- if (memcmp(it->key().data(), key_buf,
- Rdb_key_def::INDEX_NUMBER_SIZE)) {
+ if (memcmp(it->key().data(), key_buf, Rdb_key_def::INDEX_NUMBER_SIZE)) {
// Key does not have same prefix
index_removed = true;
}
@@ -11300,8 +11503,8 @@ void Rdb_drop_index_thread::run() {
timespec ts;
int sec= dict_manager.is_drop_index_empty()
- ? 24 * 60 * 60 // no filtering
- : 60; // filtering
+ ? 24 * 60 * 60 // no filtering
+ : 60; // filtering
set_timespec(ts,sec);
const auto ret MY_ATTRIBUTE((__unused__)) =
@@ -11318,26 +11521,23 @@ void Rdb_drop_index_thread::run() {
if (!indices.empty()) {
std::unordered_set<GL_INDEX_ID> finished;
rocksdb::ReadOptions read_opts;
- read_opts.total_order_seek = true; // disable bloom filter
+ read_opts.total_order_seek = true; // disable bloom filter
for (const auto d : indices) {
uint32 cf_flags = 0;
if (!dict_manager.get_cf_flags(d.cf_id, &cf_flags)) {
- sql_print_error("RocksDB: Failed to get column family flags "
- "from cf id %u. MyRocks data dictionary may "
- "get corrupted.",
- d.cf_id);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Failed to get column family flags "
+ "from cf id %u. MyRocks data dictionary may "
+ "get corrupted.",
+ d.cf_id);
abort();
}
rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id);
DBUG_ASSERT(cfh);
const bool is_reverse_cf = cf_flags & Rdb_key_def::REVERSE_CF_FLAG;
- if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id))
- {
- finished.insert(d);
- continue;
- }
uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0,
is_reverse_cf ? 0 : 1);
@@ -11357,8 +11557,7 @@ void Rdb_drop_index_thread::run() {
}
rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD);
}
- if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id))
- {
+ if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) {
finished.insert(d);
}
}
@@ -11385,28 +11584,24 @@ Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) {
}
/*
- Note: the following function is called when the table is not open. That is,
- this->table==nullptr, pk_key_descr==nullptr, etc.
+ Overload func for delete table ---it deletes table meta data in data
+ dictionary immediately and delete real data in background thread(async)
- tablename points to line in form "./dbname/tablename".
+ @param tbl IN MyRocks table definition
@return
HA_EXIT_SUCCESS OK
other HA_ERR error code (can be SE-specific)
*/
-
-int ha_rocksdb::delete_table(const char *const tablename) {
+int ha_rocksdb::delete_table(Rdb_tbl_def *const tbl) {
DBUG_ENTER_FUNC();
- DBUG_ASSERT(tablename != nullptr);
+ DBUG_ASSERT(tbl != nullptr);
+ DBUG_ASSERT(m_tbl_def == nullptr || m_tbl_def == tbl);
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
- /* Find the table in the hash */
- Rdb_tbl_def *const tbl = get_table_if_exists(tablename);
- if (!tbl)
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
dict_manager.add_drop_table(tbl->m_key_descr_arr, tbl->m_key_count, batch);
/*
@@ -11421,10 +11616,36 @@ int ha_rocksdb::delete_table(const char *const tablename) {
}
rdb_drop_idx_thread.signal();
-
+ // avoid dangling pointer
+ m_tbl_def = nullptr;
DBUG_RETURN(HA_EXIT_SUCCESS);
}
+/*
+ Note: the following function is called when the table is not open. That is,
+ this->table==nullptr, pk_key_descr==nullptr, etc.
+
+ tablename points to line in form "./dbname/tablename".
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
+int ha_rocksdb::delete_table(const char *const tablename) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(tablename != nullptr);
+
+ /* Find the table in the hash */
+ Rdb_tbl_def *const tbl = get_table_if_exists(tablename);
+ if (!tbl) {
+ DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
+ }
+
+ DBUG_RETURN(delete_table(tbl));
+}
+
int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
const rocksdb::WriteOptions wo =
rdb_get_rocksdb_write_options(handler::ha_thd());
@@ -11575,21 +11796,33 @@ int ha_rocksdb::extra(enum ha_extra_function operation) {
DBUG_ENTER_FUNC();
switch (operation) {
- case HA_EXTRA_KEYREAD:
- m_keyread_only = true;
- break;
- case HA_EXTRA_NO_KEYREAD:
- m_keyread_only = false;
- break;
- case HA_EXTRA_FLUSH:
- /*
- If the table has blobs, then they are part of m_retrieved_record.
- This call invalidates them.
- */
- m_retrieved_record.Reset();
- break;
- default:
- break;
+ case HA_EXTRA_KEYREAD:
+ m_keyread_only = true;
+ break;
+ case HA_EXTRA_NO_KEYREAD:
+ m_keyread_only = false;
+ break;
+ case HA_EXTRA_FLUSH:
+ /*
+ If the table has blobs, then they are part of m_retrieved_record.
+ This call invalidates them.
+ */
+ m_retrieved_record.Reset();
+ break;
+ case HA_EXTRA_INSERT_WITH_UPDATE:
+ // INSERT ON DUPLICATE KEY UPDATE
+ if (rocksdb_enable_insert_with_update_caching) {
+ m_insert_with_update = true;
+ }
+ break;
+ case HA_EXTRA_NO_IGNORE_DUP_KEY:
+ // PAIRED with HA_EXTRA_INSERT_WITH_UPDATE or HA_EXTRA_WRITE_CAN_REPLACE
+ // that indicates the end of REPLACE / INSERT ON DUPLICATE KEY
+ m_insert_with_update = false;
+ break;
+
+ default:
+ break;
}
DBUG_RETURN(HA_EXIT_SUCCESS);
@@ -11660,8 +11893,7 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range *const min_key,
uint64_t sz = 0;
auto disk_size = kd.m_stats.m_actual_disk_size;
- if (disk_size == 0)
- disk_size = kd.m_stats.m_data_size;
+ if (disk_size == 0) disk_size = kd.m_stats.m_data_size;
auto rows = kd.m_stats.m_rows;
if (rows == 0 || disk_size == 0) {
rows = 1;
@@ -11772,7 +12004,7 @@ static int calculate_stats(
// get RocksDB table properties for these ranges
rocksdb::TablePropertiesCollection props;
- for (auto it : ranges) {
+ for (const auto &it : ranges) {
const auto old_size MY_ATTRIBUTE((__unused__)) = props.size();
const auto status = rdb->GetPropertiesOfTablesInRange(
it.first, &it.second[0], it.second.size(), &props);
@@ -11962,73 +12194,77 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
// The next value can be more complicated if either 'inc' or 'off' is not 1
ulonglong last_val = auto_incr;
- // Loop until we can correctly update the atomic value
- do {
- DBUG_ASSERT(last_val > 0);
- // Calculate the next value in the auto increment series: offset
- // + N * increment where N is 0, 1, 2, ...
- //
- // For further information please visit:
- // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
- //
- // The following is confusing so here is an explanation:
- // To get the next number in the sequence above you subtract out the
- // offset, calculate the next sequence (N * increment) and then add the
- // offset back in.
- //
- // The additions are rearranged to avoid overflow. The following is
- // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact
- // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why:
- //
- // (a+b)/c
- // = (a - a%c + a%c + b - b%c + b%c) / c
- // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c
- // = a/c + b/c + (a%c + b%c) / c
- //
- // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the
- // following statement.
- ulonglong n =
- (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc;
-
- // Check if n * inc + off will overflow. This can only happen if we have
- // an UNSIGNED BIGINT field.
- if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) {
- DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max());
- // The 'last_val' value is already equal to or larger than the largest
- // value in the sequence. Continuing would wrap around (technically
- // the behavior would be undefined). What should we do?
- // We could:
- // 1) set the new value to the last possible number in our sequence
- // as described above. The problem with this is that this
- // number could be smaller than a value in an existing row.
- // 2) set the new value to the largest possible number. This number
- // may not be in our sequence, but it is guaranteed to be equal
- // to or larger than any other value already inserted.
+ if (last_val > max_val) {
+ new_val = std::numeric_limits<ulonglong>::max();
+ } else {
+ // Loop until we can correctly update the atomic value
+ do {
+ DBUG_ASSERT(last_val > 0);
+ // Calculate the next value in the auto increment series: offset
+ // + N * increment where N is 0, 1, 2, ...
//
- // For now I'm going to take option 2.
+ // For further information please visit:
+ // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
//
- // Returning ULLONG_MAX from get_auto_increment will cause the SQL
- // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to
- // the SE API for get_auto_increment, inserts will fail with
- // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but
- // inserts will fail with ER_DUP_ENTRY for other types (or no failure
- // if the column is in a non-unique SK).
- new_val = std::numeric_limits<ulonglong>::max();
- auto_incr = new_val; // Store the largest value into auto_incr
- break;
- }
+ // The following is confusing so here is an explanation:
+ // To get the next number in the sequence above you subtract out the
+ // offset, calculate the next sequence (N * increment) and then add the
+ // offset back in.
+ //
+ // The additions are rearranged to avoid overflow. The following is
+ // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact
+ // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why:
+ //
+ // (a+b)/c
+ // = (a - a%c + a%c + b - b%c + b%c) / c
+ // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c
+ // = a/c + b/c + (a%c + b%c) / c
+ //
+ // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the
+ // following statement.
+ ulonglong n =
+ (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc;
+
+ // Check if n * inc + off will overflow. This can only happen if we have
+ // an UNSIGNED BIGINT field.
+ if (n > (std::numeric_limits<ulonglong>::max() - off) / inc) {
+ DBUG_ASSERT(max_val == std::numeric_limits<ulonglong>::max());
+ // The 'last_val' value is already equal to or larger than the largest
+ // value in the sequence. Continuing would wrap around (technically
+ // the behavior would be undefined). What should we do?
+ // We could:
+ // 1) set the new value to the last possible number in our sequence
+ // as described above. The problem with this is that this
+ // number could be smaller than a value in an existing row.
+ // 2) set the new value to the largest possible number. This number
+ // may not be in our sequence, but it is guaranteed to be equal
+ // to or larger than any other value already inserted.
+ //
+ // For now I'm going to take option 2.
+ //
+ // Returning ULLONG_MAX from get_auto_increment will cause the SQL
+ // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to
+ // the SE API for get_auto_increment, inserts will fail with
+ // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but
+ // inserts will fail with ER_DUP_ENTRY for other types (or no failure
+ // if the column is in a non-unique SK).
+ new_val = std::numeric_limits<ulonglong>::max();
+ auto_incr = new_val; // Store the largest value into auto_incr
+ break;
+ }
- new_val = n * inc + off;
+ new_val = n * inc + off;
- // Attempt to store the new value (plus 1 since m_auto_incr_val contains
- // the next available value) into the atomic value. If the current
- // value no longer matches what we have in 'last_val' this will fail and
- // we will repeat the loop (`last_val` will automatically get updated
- // with the current value).
- //
- // See above explanation for inc == 1 for why we use std::min.
- } while (!auto_incr.compare_exchange_weak(last_val,
- std::min(new_val + 1, max_val)));
+ // Attempt to store the new value (plus 1 since m_auto_incr_val contains
+ // the next available value) into the atomic value. If the current
+ // value no longer matches what we have in 'last_val' this will fail and
+ // we will repeat the loop (`last_val` will automatically get updated
+ // with the current value).
+ //
+ // See above explanation for inc == 1 for why we use std::min.
+ } while (!auto_incr.compare_exchange_weak(
+ last_val, std::min(new_val + 1, max_val)));
+ }
}
*first_value = new_val;
@@ -12044,13 +12280,13 @@ const char *dbug_print_item(Item *const item) {
char *const buf = dbug_item_print_buf;
String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin);
str.length(0);
- if (!item)
- return "(Item*)nullptr";
+ if (!item) return "(Item*)nullptr";
item->print(&str, QT_ORDINARY);
- if (str.c_ptr() == buf)
+ if (str.c_ptr() == buf) {
return buf;
- else
+ } else {
return "Couldn't fit into buffer";
+ }
}
#endif /*DBUG_OFF*/
@@ -12179,7 +12415,6 @@ bool ha_rocksdb::prepare_inplace_alter_table(
my_core::Alter_inplace_info::DROP_UNIQUE_INDEX |
my_core::Alter_inplace_info::ADD_INDEX |
my_core::Alter_inplace_info::ADD_UNIQUE_INDEX)) {
-
if (has_hidden_pk(altered_table)) {
new_n_keys += 1;
}
@@ -12466,7 +12701,7 @@ int ha_rocksdb::inplace_populate_sk(
const int new_packed_size = index->pack_record(
new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple,
&m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0,
- nullptr, nullptr, m_ttl_bytes);
+ nullptr, m_ttl_bytes);
const rocksdb::Slice key = rocksdb::Slice(
reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size);
@@ -12514,9 +12749,9 @@ int ha_rocksdb::inplace_populate_sk(
is used inside print_keydup_error so that the error message shows
the duplicate record.
*/
- if (index->unpack_record(new_table_arg, new_table_arg->record[0],
- &merge_key, nullptr,
- m_verify_row_debug_checksums)) {
+ if (index->unpack_record(
+ new_table_arg, new_table_arg->record[0], &merge_key,
+ &merge_val, m_converter->get_verify_row_debug_checksums())) {
/* Should never reach here */
DBUG_ASSERT(0);
}
@@ -12546,7 +12781,9 @@ int ha_rocksdb::inplace_populate_sk(
DBUG_RETURN(res);
}
- if ((res = tx->finish_bulk_load())) {
+ bool is_critical_error;
+ res = tx->finish_bulk_load(&is_critical_error);
+ if (res && is_critical_error) {
// NO_LINT_DEBUG
sql_print_error("Error finishing bulk load.");
DBUG_RETURN(res);
@@ -12756,22 +12993,22 @@ bool ha_rocksdb::commit_inplace_alter_table(
#define SHOW_FNAME(name) rocksdb_show_##name
-#define DEF_SHOW_FUNC(name, key) \
- static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \
- rocksdb_status_counters.name = \
- rocksdb_stats->getTickerCount(rocksdb::key); \
- var->type = SHOW_LONGLONG; \
- var->value = (char *)&rocksdb_status_counters.name; \
- return HA_EXIT_SUCCESS; \
+#define DEF_SHOW_FUNC(name, key) \
+ static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \
+ rocksdb_status_counters.name = \
+ rocksdb_stats->getTickerCount(rocksdb::key); \
+ var->type = SHOW_LONGLONG; \
+ var->value = reinterpret_cast<char *>(&rocksdb_status_counters.name); \
+ return HA_EXIT_SUCCESS; \
}
-#define DEF_STATUS_VAR(name) \
+#define DEF_STATUS_VAR(name) \
{ "rocksdb_" #name, (char *)&SHOW_FNAME(name), SHOW_FUNC }
-#define DEF_STATUS_VAR_PTR(name, ptr, option) \
+#define DEF_STATUS_VAR_PTR(name, ptr, option) \
{ "rocksdb_" name, (char *)ptr, option }
-#define DEF_STATUS_VAR_FUNC(name, ptr, option) \
+#define DEF_STATUS_VAR_FUNC(name, ptr, option) \
{ name, reinterpret_cast<char *>(ptr), option }
struct rocksdb_status_counters_t {
@@ -13001,9 +13238,8 @@ static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) {
var->value = reinterpret_cast<char *>(&myrocks_status_variables);
}
-static ulonglong
-io_stall_prop_value(const std::map<std::string, std::string> &props,
- const std::string &key) {
+static ulonglong io_stall_prop_value(
+ const std::map<std::string, std::string> &props, const std::string &key) {
std::map<std::string, std::string>::const_iterator iter =
props.find("io_stalls." + key);
if (iter != props.end()) {
@@ -13181,6 +13417,10 @@ static SHOW_VAR rocksdb_status_vars[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other,
SHOW_LONGLONG),
+#ifndef DBUG_OFF
+ DEF_STATUS_VAR_PTR("num_get_for_update_calls",
+ &rocksdb_num_get_for_update_calls, SHOW_LONGLONG),
+#endif
// the variables generated by SHOW_FUNC are sorted only by prefix (first
// arg in the tuple below), so make sure it is unique to make sorting
// deterministic as quick sort is not stable
@@ -13422,6 +13662,49 @@ bool Rdb_manual_compaction_thread::is_manual_compaction_finished(int mc_id) {
return finished;
}
+/**
+ * Locking read + Not Found + Read Committed occurs if we accessed
+ * a row by Seek, tried to lock it, failed, released and reacquired the
+ * snapshot (because of READ COMMITTED mode) and the row was deleted by
+ * someone else in the meantime.
+ * If so, we either just skipping the row, or re-creating a snapshot
+ * and seek again. In both cases, Read Committed constraint is not broken.
+ */
+bool ha_rocksdb::should_skip_invalidated_record(const int rc) {
+ if ((m_lock_rows != RDB_LOCK_NONE && rc == HA_ERR_KEY_NOT_FOUND &&
+ my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED)) {
+ return true;
+ }
+ return false;
+}
+/**
+ * Indicating snapshot needs to be re-created and retrying seek again,
+ * instead of returning errors or empty set. This is normally applicable
+ * when hitting kBusy when locking the first row of the transaction,
+ * with Repeatable Read isolation level.
+ */
+bool ha_rocksdb::should_recreate_snapshot(const int rc,
+ const bool is_new_snapshot) {
+ if (should_skip_invalidated_record(rc) ||
+ (rc == HA_ERR_ROCKSDB_STATUS_BUSY && is_new_snapshot)) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * If calling put/delete/singledelete without locking the row,
+ * it is necessary to pass assume_tracked=false to RocksDB TX API.
+ * Read Free Replication and Blind Deletes are the cases when
+ * using TX API and skipping row locking.
+ */
+bool ha_rocksdb::can_assume_tracked(THD *thd) {
+ if (/* MARIAROCKS_NOT_YET use_read_free_rpl() ||*/ (THDVAR(thd, blind_delete_primary_key))) {
+ return false;
+ }
+ return true;
+}
+
bool ha_rocksdb::check_bloom_and_set_bounds(
THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond,
const bool use_all_keys, size_t bound_len, uchar *const lower_bound,
@@ -13482,20 +13765,22 @@ bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
shorter require all parts of the key to be available
for the short key match.
*/
- if ((use_all_keys && prefix_extractor->InRange(eq_cond))
- || prefix_extractor->SameResultWhenAppended(eq_cond))
+ if ((use_all_keys && prefix_extractor->InRange(eq_cond)) ||
+ prefix_extractor->SameResultWhenAppended(eq_cond)) {
can_use = true;
- else
+ } else {
can_use = false;
+ }
} else {
/*
if prefix extractor is not defined, all key parts have to be
used by eq_cond.
*/
- if (use_all_keys)
+ if (use_all_keys) {
can_use = true;
- else
+ } else {
can_use = false;
+ }
}
return can_use;
@@ -13514,7 +13799,7 @@ bool rdb_is_ttl_enabled() { return rocksdb_enable_ttl; }
bool rdb_is_ttl_read_filtering_enabled() {
return rocksdb_enable_ttl_read_filtering;
}
-#ifndef NDEBUG
+#ifndef DBUG_OFF
int rdb_dbug_set_ttl_rec_ts() { return rocksdb_debug_ttl_rec_ts; }
int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; }
int rdb_dbug_set_ttl_read_filter_ts() {
@@ -13561,17 +13846,17 @@ const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) {
static_assert(RDB_IO_ERROR_LAST == 4, "Please handle all the error types.");
switch (err_type) {
- case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT:
- return "RDB_IO_ERROR_TX_COMMIT";
- case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT:
- return "RDB_IO_ERROR_DICT_COMMIT";
- case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD:
- return "RDB_IO_ERROR_BG_THREAD";
- case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL:
- return "RDB_IO_ERROR_GENERAL";
- default:
- DBUG_ASSERT(false);
- return "(unknown)";
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT:
+ return "RDB_IO_ERROR_TX_COMMIT";
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT:
+ return "RDB_IO_ERROR_DICT_COMMIT";
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD:
+ return "RDB_IO_ERROR_BG_THREAD";
+ case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL:
+ return "RDB_IO_ERROR_GENERAL";
+ default:
+ DBUG_ASSERT(false);
+ return "(unknown)";
}
}
@@ -13583,32 +13868,38 @@ const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) {
void rdb_handle_io_error(const rocksdb::Status status,
const RDB_IO_ERROR_TYPE err_type) {
if (status.IsIOError()) {
- switch (err_type) {
- case RDB_IO_ERROR_TX_COMMIT:
- case RDB_IO_ERROR_DICT_COMMIT: {
- rdb_log_status_error(status, "failed to write to WAL");
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: aborting on WAL write error.");
- abort();
- break;
- }
- case RDB_IO_ERROR_BG_THREAD: {
- rdb_log_status_error(status, "BG thread failed to write to RocksDB");
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: aborting on BG write error.");
- abort();
- break;
- }
- case RDB_IO_ERROR_GENERAL: {
- rdb_log_status_error(status, "failed on I/O");
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: aborting on I/O error.");
- abort();
- break;
+ /* skip dumping core if write failed and we are allowed to do so */
+#ifdef MARIAROCKS_NOT_YET
+ if (skip_core_dump_on_error) {
+ opt_core_file = false;
}
- default:
- DBUG_ASSERT(0);
- break;
+#endif
+ switch (err_type) {
+ case RDB_IO_ERROR_TX_COMMIT:
+ case RDB_IO_ERROR_DICT_COMMIT: {
+ rdb_log_status_error(status, "failed to write to WAL");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on WAL write error.");
+ abort();
+ break;
+ }
+ case RDB_IO_ERROR_BG_THREAD: {
+ rdb_log_status_error(status, "BG thread failed to write to RocksDB");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on BG write error.");
+ abort();
+ break;
+ }
+ case RDB_IO_ERROR_GENERAL: {
+ rdb_log_status_error(status, "failed on I/O");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on I/O error.");
+ abort();
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ break;
}
} else if (status.IsCorruption()) {
rdb_log_status_error(status, "data corruption detected!");
@@ -13618,16 +13909,16 @@ void rdb_handle_io_error(const rocksdb::Status status,
abort();
} else if (!status.ok()) {
switch (err_type) {
- case RDB_IO_ERROR_DICT_COMMIT: {
- rdb_log_status_error(status, "Failed to write to WAL (dictionary)");
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: aborting on WAL write error.");
- abort();
- break;
- }
- default:
- rdb_log_status_error(status, "Failed to read/write in RocksDB");
- break;
+ case RDB_IO_ERROR_DICT_COMMIT: {
+ rdb_log_status_error(status, "Failed to write to WAL (dictionary)");
+ /* NO_LINT_DEBUG */
+ sql_print_error("MyRocks: aborting on WAL write error.");
+ abort();
+ break;
+ }
+ default:
+ rdb_log_status_error(status, "Failed to read/write in RocksDB");
+ break;
}
}
}
@@ -13735,9 +14026,10 @@ void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var,
if (!s.ok()) {
/* NO_LINT_DEBUG */
- sql_print_warning("MyRocks: failed to update delayed_write_rate. "
- "status code = %d, status = %s",
- s.code(), s.ToString().c_str());
+ sql_print_warning(
+ "MyRocks: failed to update delayed_write_rate. "
+ "status code = %d, status = %s",
+ s.code(), s.ToString().c_str());
}
}
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
@@ -13795,8 +14087,7 @@ int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) {
} else if (new_value_type == MYSQL_VALUE_TYPE_INT) {
long long intbuf;
value->val_int(value, &intbuf);
- if (intbuf > 1)
- return 1;
+ if (intbuf > 1) return 1;
*return_value = intbuf > 0 ? TRUE : FALSE;
} else {
return 1;
@@ -13815,12 +14106,14 @@ int rocksdb_check_bulk_load(
Rdb_transaction *tx = get_tx_from_thd(thd);
if (tx != nullptr) {
- const int rc = tx->finish_bulk_load();
- if (rc != 0) {
+ bool is_critical_error;
+ const int rc = tx->finish_bulk_load(&is_critical_error);
+ if (rc != 0 && is_critical_error) {
// NO_LINT_DEBUG
- sql_print_error("RocksDB: Error %d finalizing last SST file while "
- "setting bulk loading variable",
- rc);
+ sql_print_error(
+ "RocksDB: Error %d finalizing last SST file while "
+ "setting bulk loading variable",
+ rc);
THDVAR(thd, bulk_load) = 0;
return 1;
}
@@ -13868,9 +14161,10 @@ static void rocksdb_set_max_background_jobs(THD *thd,
if (!s.ok()) {
/* NO_LINT_DEBUG */
- sql_print_warning("MyRocks: failed to update max_background_jobs. "
- "Status code = %d, status = %s.",
- s.code(), s.ToString().c_str());
+ sql_print_warning(
+ "MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
}
}
@@ -13896,9 +14190,10 @@ static void rocksdb_set_bytes_per_sync(
if (!s.ok()) {
/* NO_LINT_DEBUG */
- sql_print_warning("MyRocks: failed to update max_background_jobs. "
- "Status code = %d, status = %s.",
- s.code(), s.ToString().c_str());
+ sql_print_warning(
+ "MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
}
}
@@ -13924,9 +14219,10 @@ static void rocksdb_set_wal_bytes_per_sync(
if (!s.ok()) {
/* NO_LINT_DEBUG */
- sql_print_warning("MyRocks: failed to update max_background_jobs. "
- "Status code = %d, status = %s.",
- s.code(), s.ToString().c_str());
+ sql_print_warning(
+ "MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
}
}
@@ -13953,7 +14249,7 @@ static int rocksdb_validate_set_block_cache_size(
}
if (new_value < RDB_MIN_BLOCK_CACHE_SIZE ||
- (uint64_t)new_value > (uint64_t)LONGLONG_MAX) {
+ (uint64_t)new_value > (uint64_t)LLONG_MAX) {
return HA_EXIT_FAILURE;
}
@@ -13969,17 +14265,19 @@ static int rocksdb_validate_set_block_cache_size(
return HA_EXIT_SUCCESS;
}
-static int
-rocksdb_validate_update_cf_options(THD * /* unused */,
- struct st_mysql_sys_var * /*unused*/,
- void *save, struct st_mysql_value *value) {
-
+static int rocksdb_validate_update_cf_options(
+ THD * /* unused */, struct st_mysql_sys_var * /*unused*/, void *save,
+ struct st_mysql_value *value) {
char buff[STRING_BUFFER_USUAL_SIZE];
const char *str;
int length;
length = sizeof(buff);
str = value->val_str(value, buff, &length);
- *(const char **)save = str;
+ // In some cases, str can point to buff in the stack.
+ // This can cause invalid memory access after validation is finished.
+ // To avoid this kind case, let's alway duplicate the str if str is not
+ // nullptr
+ *(const char **)save = (str == nullptr) ? nullptr : my_strdup(str, MYF(0));
if (str == nullptr) {
return HA_EXIT_SUCCESS;
@@ -13993,13 +14291,17 @@ rocksdb_validate_update_cf_options(THD * /* unused */,
my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str);
return HA_EXIT_FAILURE;
}
+ // Loop through option_map and create missing column families
+ for (Rdb_cf_options::Name_to_config_t::iterator it = option_map.begin();
+ it != option_map.end(); ++it) {
+ cf_manager.get_or_create_cf(rdb, it->first);
+ }
return HA_EXIT_SUCCESS;
}
-static void
-rocksdb_set_update_cf_options(THD *const /* unused */,
- struct st_mysql_sys_var *const /* unused */,
- void *const var_ptr, const void *const save) {
+static void rocksdb_set_update_cf_options(
+ THD *const /* unused */, struct st_mysql_sys_var *const /* unused */,
+ void *const var_ptr, const void *const save) {
const char *const val = *static_cast<const char *const *>(save);
RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
@@ -14017,7 +14319,7 @@ rocksdb_set_update_cf_options(THD *const /* unused */,
// Reset the pointers regardless of how much success we had with updating
// the CF options. This will results in consistent behavior and avoids
// dealing with cases when only a subset of CF-s was successfully updated.
- *reinterpret_cast<char **>(var_ptr) = my_strdup(val, MYF(0));
+ *reinterpret_cast<const char **>(var_ptr) = val;
// Do the real work of applying the changes.
Rdb_cf_options::Name_to_config_t option_map;
@@ -14045,9 +14347,10 @@ rocksdb_set_update_cf_options(THD *const /* unused */,
if (s != rocksdb::Status::OK()) {
// NO_LINT_DEBUG
- sql_print_warning("MyRocks: failed to convert the options for column "
- "family '%s' to a map. %s", cf_name.c_str(),
- s.ToString().c_str());
+ sql_print_warning(
+ "MyRocks: failed to convert the options for column "
+ "family '%s' to a map. %s",
+ cf_name.c_str(), s.ToString().c_str());
} else {
DBUG_ASSERT(rdb != nullptr);
@@ -14056,14 +14359,16 @@ rocksdb_set_update_cf_options(THD *const /* unused */,
if (s != rocksdb::Status::OK()) {
// NO_LINT_DEBUG
- sql_print_warning("MyRocks: failed to apply the options for column "
- "family '%s'. %s", cf_name.c_str(),
- s.ToString().c_str());
+ sql_print_warning(
+ "MyRocks: failed to apply the options for column "
+ "family '%s'. %s",
+ cf_name.c_str(), s.ToString().c_str());
} else {
// NO_LINT_DEBUG
- sql_print_information("MyRocks: options for column family '%s' "
- "have been successfully updated.",
- cf_name.c_str());
+ sql_print_information(
+ "MyRocks: options for column family '%s' "
+ "have been successfully updated.",
+ cf_name.c_str());
// Make sure that data is internally consistent as well and update
// the CF options. This is necessary also to make sure that the CF
@@ -14126,18 +14431,39 @@ void ha_rocksdb::rpl_after_update_rows() {
DBUG_VOID_RETURN;
}
+#if 0
+bool ha_rocksdb::is_read_free_rpl_table() const {
+ return table->s && m_tbl_def->m_is_read_free_rpl_table;
+}
+#endif
+
/**
@brief
- Read Free Replication can be used or not. Returning False means
- Read Free Replication can be used. Read Free Replication can be used
- on UPDATE or DELETE row events, and table must have user defined
- primary key.
+ Read Free Replication can be used or not. Returning true means
+ Read Free Replication can be used.
*/
-bool ha_rocksdb::use_read_free_rpl() {
+bool ha_rocksdb::use_read_free_rpl() const {
DBUG_ENTER_FUNC();
- DBUG_RETURN((m_in_rpl_delete_rows || m_in_rpl_update_rows) &&
- !has_hidden_pk(table) && m_use_read_free_rpl);
+ if (!ha_thd()->rli_slave || table->triggers || /* !is_read_free_rpl_table()*/ ) {
+ DBUG_RETURN(false);
+ }
+
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+ switch (rocksdb_read_free_rpl) {
+ case read_free_rpl_type::OFF:
+ DBUG_RETURN(false);
+ case read_free_rpl_type::PK_ONLY:
+ DBUG_RETURN(!has_hidden_pk(table) && table->s->keys == 1);
+ case read_free_rpl_type::PK_SK:
+ DBUG_RETURN(!has_hidden_pk(table));
+ }
+#else
+ DBUG_RETURN(false);
+#endif
+
+ DBUG_ASSERT(false);
+ DBUG_RETURN(false);
}
#endif // MARIAROCKS_NOT_YET
@@ -14176,7 +14502,7 @@ void sql_print_verbose_info(const char *format, ...)
}
}
-} // namespace myrocks
+} // namespace myrocks
/**
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index a48d745fcf2..437c8667994 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -50,7 +50,9 @@
#include "rocksdb/utilities/write_batch_with_index.h"
/* MyRocks header files */
+#include "./rdb_buff.h"
#include "./rdb_comparator.h"
+#include "./rdb_global.h"
#include "./rdb_index_merge.h"
#include "./rdb_io_watchdog.h"
#include "./rdb_perf_context.h"
@@ -58,6 +60,8 @@
#include "./rdb_utils.h"
#include "./ut0counter.h"
+#include "rdb_mariadb_server_port.h"
+
class Field_blob;
class Field_varstring;
/**
@@ -69,181 +73,13 @@ class Field_varstring;
namespace myrocks {
-/*
- * class for exporting transaction information for
- * information_schema.rocksdb_trx
- */
-struct Rdb_trx_info {
- std::string name;
- ulonglong trx_id;
- ulonglong write_count;
- ulonglong lock_count;
- int timeout_sec;
- std::string state;
- std::string waiting_key;
- ulonglong waiting_cf_id;
- int is_replication;
- int skip_trx_api;
- int read_only;
- int deadlock_detect;
- int num_ongoing_bulk_load;
- ulong thread_id;
- std::string query_str;
-};
-
-std::vector<Rdb_trx_info> rdb_get_all_trx_info();
-
-/*
- * class for exporting deadlock transaction information for
- * information_schema.rocksdb_deadlock
- */
-struct Rdb_deadlock_info {
- struct Rdb_dl_trx_info {
- ulonglong trx_id;
- std::string cf_name;
- std::string waiting_key;
- bool exclusive_lock;
- std::string index_name;
- std::string table_name;
- };
- std::vector <Rdb_dl_trx_info> path;
- int64_t deadlock_time;
- ulonglong victim_trx_id;
-};
-
-std::vector<Rdb_deadlock_info> rdb_get_deadlock_info();
-
-/*
- This is
- - the name of the default Column Family (the CF which stores indexes which
- didn't explicitly specify which CF they are in)
- - the name used to set the default column family parameter for per-cf
- arguments.
-*/
-extern const std::string DEFAULT_CF_NAME;
-
-/*
- This is the name of the Column Family used for storing the data dictionary.
-*/
-extern const std::string DEFAULT_SYSTEM_CF_NAME;
-
-/*
- This is the name of the hidden primary key for tables with no pk.
-*/
-const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID";
-
-/*
- Column family name which means "put this index into its own column family".
- DEPRECATED!!!
-*/
-extern const std::string PER_INDEX_CF_NAME;
-
-/*
- Name for the background thread.
-*/
-const char *const BG_THREAD_NAME = "myrocks-bg";
-
-/*
- Name for the drop index thread.
-*/
-const char *const INDEX_THREAD_NAME = "myrocks-index";
-
-/*
- Name for the manual compaction thread.
-*/
-const char *const MANUAL_COMPACTION_THREAD_NAME = "myrocks-mc";
-
-/*
- Separator between partition name and the qualifier. Sample usage:
-
- - p0_cfname=foo
- - p3_tts_col=bar
-*/
-const char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_';
-
-/*
- Separator between qualifier name and value. Sample usage:
-
- - p0_cfname=foo
- - p3_tts_col=bar
-*/
-const char RDB_QUALIFIER_VALUE_SEP = '=';
-
-/*
- Separator between multiple qualifier assignments. Sample usage:
-
- - p0_cfname=foo;p1_cfname=bar;p2_cfname=baz
-*/
-const char RDB_QUALIFIER_SEP = ';';
-
-/*
- Qualifier name for a custom per partition column family.
-*/
-const char *const RDB_CF_NAME_QUALIFIER = "cfname";
-
-/*
- Qualifier name for a custom per partition ttl duration.
-*/
-const char *const RDB_TTL_DURATION_QUALIFIER = "ttl_duration";
-
-/*
- Qualifier name for a custom per partition ttl duration.
-*/
-const char *const RDB_TTL_COL_QUALIFIER = "ttl_col";
-
-/*
- Default, minimal valid, and maximum valid sampling rate values when collecting
- statistics about table.
-*/
-#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10
-#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1
-#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100
-
-/*
- Default and maximum values for rocksdb-compaction-sequential-deletes and
- rocksdb-compaction-sequential-deletes-window to add basic boundary checking.
-*/
-#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0
-#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000
-
-#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0
-#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000
-
-/*
- Default and maximum values for various compaction and flushing related
- options. Numbers are based on the hardware we currently use and our internal
- benchmarks which indicate that parallelization helps with the speed of
- compactions.
-
- Ideally of course we'll use heuristic technique to determine the number of
- CPU-s and derive the values from there. This however has its own set of
- problems and we'll choose simplicity for now.
-*/
-#define MAX_BACKGROUND_JOBS 64
-
-#define DEFAULT_SUBCOMPACTIONS 1
-#define MAX_SUBCOMPACTIONS 64
-
-/*
- Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled).
-*/
-#define DEFAULT_SST_MGR_RATE_BYTES_PER_SEC 0
-
-/*
- Defines the field sizes for serializing XID object to a string representation.
- string byte format: [field_size: field_value, ...]
- [
- 8: XID.formatID,
- 1: XID.gtrid_length,
- 1: XID.bqual_length,
- XID.gtrid_length + XID.bqual_length: XID.data
- ]
-*/
-#define RDB_FORMATID_SZ 8
-#define RDB_GTRID_SZ 1
-#define RDB_BQUAL_SZ 1
-#define RDB_XIDHDR_LEN (RDB_FORMATID_SZ + RDB_GTRID_SZ + RDB_BQUAL_SZ)
-
+class Rdb_converter;
+class Rdb_key_def;
+class Rdb_tbl_def;
+class Rdb_transaction;
+class Rdb_transaction_impl;
+class Rdb_writebatch_impl;
+class Rdb_field_encoder;
/* collations, used in MariaRocks */
enum collations_used {
COLLATION_UTF8MB4_BIN = 46,
@@ -255,74 +91,13 @@ enum collations_used {
COLLATION_UTF8_BIN = 83
};
-/*
- To fix an unhandled exception we specify the upper bound as LONGLONGMAX
- instead of ULONGLONGMAX because the latter is -1 and causes an exception when
- cast to jlong (signed) of JNI
-
- The reason behind the cast issue is the lack of unsigned int support in Java.
-*/
-#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LONGLONG_MAX)
-
-/*
- Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes).
- static_assert() in code will validate this assumption.
-*/
-#define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong)
-
-/*
- Bytes used to store TTL, in the beginning of all records for tables with TTL
- enabled.
-*/
-#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
-
-#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong)
-
-/*
- Maximum index prefix length in bytes.
-*/
-#define MAX_INDEX_COL_LEN_LARGE 3072
-#define MAX_INDEX_COL_LEN_SMALL 767
-
-/*
- MyRocks specific error codes. NB! Please make sure that you will update
- HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in
- rdb_error_messages to include any new error messages.
-*/
-#define HA_ERR_ROCKSDB_FIRST (HA_ERR_LAST + 1)
-#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0)
-#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \
- (HA_ERR_ROCKSDB_FIRST + 1)
-#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \
- (HA_ERR_ROCKSDB_FIRST + 2)
-#define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3)
-#define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4)
-#define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5)
-#define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6)
-#define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7)
-#define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8)
-#define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9)
-/*
- Each error code below maps to a RocksDB status code found in:
- rocksdb/include/rocksdb/status.h
-*/
-#define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_LAST + 10)
-#define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_LAST + 11)
-#define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_LAST + 12)
-#define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_LAST + 13)
-#define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_LAST + 14)
-#define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_LAST + 15)
-#define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_LAST + 16)
-#define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_LAST + 17)
-#define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_LAST + 18)
-#define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_LAST + 19)
-#define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_LAST + 20)
-#define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_LAST + 21)
-#define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_LAST + 22)
-#define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_LAST + 23)
-#define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_LAST + 24)
-#define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_LAST + 25)
-#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+extern char *rocksdb_read_free_rpl_tables;
+#if defined(HAVE_PSI_INTERFACE)
+extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
+#endif
+extern Regex_list_handler rdb_read_free_regex_handler;
+#endif
/**
@brief
@@ -338,7 +113,7 @@ struct Rdb_table_handler {
atomic_stat<int> m_lock_wait_timeout_counter;
atomic_stat<int> m_deadlock_counter;
- my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock
+ my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock
/* Stores cumulative table statistics */
my_io_perf_atomic_t m_io_perf_read;
@@ -352,138 +127,19 @@ struct Rdb_table_handler {
uint64_t m_mtcache_last_update;
};
-class Rdb_key_def;
-class Rdb_tbl_def;
-class Rdb_transaction;
-class Rdb_transaction_impl;
-class Rdb_writebatch_impl;
-class Rdb_field_encoder;
-
-const char *const rocksdb_hton_name = "ROCKSDB";
-
-typedef struct _gl_index_id_s {
- uint32_t cf_id;
- uint32_t index_id;
- bool operator==(const struct _gl_index_id_s &other) const {
- return cf_id == other.cf_id && index_id == other.index_id;
- }
- bool operator!=(const struct _gl_index_id_s &other) const {
- return cf_id != other.cf_id || index_id != other.index_id;
- }
- bool operator<(const struct _gl_index_id_s &other) const {
- return cf_id < other.cf_id ||
- (cf_id == other.cf_id && index_id < other.index_id);
- }
- bool operator<=(const struct _gl_index_id_s &other) const {
- return cf_id < other.cf_id ||
- (cf_id == other.cf_id && index_id <= other.index_id);
- }
- bool operator>(const struct _gl_index_id_s &other) const {
- return cf_id > other.cf_id ||
- (cf_id == other.cf_id && index_id > other.index_id);
- }
- bool operator>=(const struct _gl_index_id_s &other) const {
- return cf_id > other.cf_id ||
- (cf_id == other.cf_id && index_id >= other.index_id);
- }
-} GL_INDEX_ID;
-
-enum operation_type : int {
- ROWS_DELETED = 0,
- ROWS_INSERTED,
- ROWS_READ,
- ROWS_UPDATED,
- ROWS_DELETED_BLIND,
- ROWS_EXPIRED,
- ROWS_FILTERED,
- ROWS_HIDDEN_NO_SNAPSHOT,
- ROWS_MAX
-};
-
-enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX };
-
-#if defined(HAVE_SCHED_GETCPU)
-#define RDB_INDEXER get_sched_indexer_t
-#else
-#define RDB_INDEXER thread_id_indexer_t
-#endif
-
-/* Global statistics struct used inside MyRocks */
-struct st_global_stats {
- ib_counter_t<ulonglong, 64, RDB_INDEXER> rows[ROWS_MAX];
-
- // system_rows_ stats are only for system
- // tables. They are not counted in rows_* stats.
- ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX];
-
- ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX];
-
- ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups;
-};
-
-/* Struct used for exporting status to MySQL */
-struct st_export_stats {
- ulonglong rows_deleted;
- ulonglong rows_inserted;
- ulonglong rows_read;
- ulonglong rows_updated;
- ulonglong rows_deleted_blind;
- ulonglong rows_expired;
- ulonglong rows_filtered;
- ulonglong rows_hidden_no_snapshot;
-
- ulonglong system_rows_deleted;
- ulonglong system_rows_inserted;
- ulonglong system_rows_read;
- ulonglong system_rows_updated;
-
- ulonglong queries_point;
- ulonglong queries_range;
-
- ulonglong covered_secondary_key_lookups;
-};
-
-/* Struct used for exporting RocksDB memory status */
-struct st_memory_stats {
- ulonglong memtable_total;
- ulonglong memtable_unflushed;
-};
-
-/* Struct used for exporting RocksDB IO stalls stats */
-struct st_io_stall_stats {
- ulonglong level0_slowdown;
- ulonglong level0_slowdown_with_compaction;
- ulonglong level0_numfiles;
- ulonglong level0_numfiles_with_compaction;
- ulonglong stop_for_pending_compaction_bytes;
- ulonglong slowdown_for_pending_compaction_bytes;
- ulonglong memtable_compaction;
- ulonglong memtable_slowdown;
- ulonglong total_stop;
- ulonglong total_slowdown;
-
- st_io_stall_stats()
- : level0_slowdown(0), level0_slowdown_with_compaction(0),
- level0_numfiles(0), level0_numfiles_with_compaction(0),
- stop_for_pending_compaction_bytes(0),
- slowdown_for_pending_compaction_bytes(0), memtable_compaction(0),
- memtable_slowdown(0), total_stop(0), total_slowdown(0) {}
-};
-
-} // namespace myrocks
-
-#include "./rdb_buff.h"
+} // namespace myrocks
/* Provide hash function for GL_INDEX_ID so we can include it in sets */
namespace std {
-template <> struct hash<myrocks::GL_INDEX_ID> {
+template <>
+struct hash<myrocks::GL_INDEX_ID> {
std::size_t operator()(const myrocks::GL_INDEX_ID &gl_index_id) const {
const uint64_t val =
((uint64_t)gl_index_id.cf_id << 32 | (uint64_t)gl_index_id.index_id);
return std::hash<uint64_t>()(val);
}
};
-} // namespace std
+} // namespace std
namespace myrocks {
@@ -493,9 +149,9 @@ namespace myrocks {
*/
class ha_rocksdb : public my_core::handler {
- my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock
+ my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock
- Rdb_table_handler *m_table_handler; ///< Open table handler
+ Rdb_table_handler *m_table_handler; ///< Open table handler
/* Iterator used for range scans and for full table/index scans */
rocksdb::Iterator *m_scan_it;
@@ -536,11 +192,6 @@ class ha_rocksdb : public my_core::handler {
*/
mutable bool m_pk_can_be_decoded;
- /*
- TRUE <=> Some fields in the PK may require unpack_info.
- */
- bool m_maybe_unpack_info;
-
uchar *m_pk_tuple; /* Buffer for storing PK in KeyTupleFormat */
uchar *m_pk_packed_tuple; /* Buffer for storing PK in StorageFormat */
// ^^ todo: change it to 'char*'? TODO: ^ can we join this with last_rowkey?
@@ -584,10 +235,13 @@ class ha_rocksdb : public my_core::handler {
*/
uchar *m_pack_buffer;
+ /* class to convert between Mysql format and RocksDB format*/
+ std::shared_ptr<Rdb_converter> m_converter;
+
/*
Pointer to the original TTL timestamp value (8 bytes) during UPDATE.
*/
- char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD];
+ char *m_ttl_bytes;
/*
The TTL timestamp value can change if the explicit TTL column is
updated. If we detect this when updating the PK, we indicate it here so
@@ -598,9 +252,6 @@ class ha_rocksdb : public my_core::handler {
/* rowkey of the last record we've read, in StorageFormat. */
String m_last_rowkey;
- /* Buffer used by convert_record_to_storage_format() */
- String m_storage_record;
-
/*
Last retrieved record, in table->record[0] data format.
@@ -620,15 +271,28 @@ class ha_rocksdb : public my_core::handler {
/* TRUE means we are accessing the first row after a snapshot was created */
bool m_rnd_scan_is_new_snapshot;
- /* TRUE means the replication slave will use Read Free Replication */
- bool m_use_read_free_rpl;
-
/*
TRUE means we should skip unique key checks for this table if the
replication lag gets too large
*/
bool m_skip_unique_check;
+ /*
+ TRUE means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by
+ remember the failed attempt (if there is one that violates uniqueness check)
+ in write_row and in the following index_read to skip the lock check and read
+ entirely
+ */
+ bool m_insert_with_update;
+
+ /* TRUE if last time the insertion failed due to duplicated PK */
+ bool m_dup_pk_found;
+
+#ifndef DBUG_OFF
+ /* Last retreived record for sanity checking */
+ String m_dup_pk_retrieved_record;
+#endif
+
/**
@brief
This is a bitmap of indexes (i.e. a set) whose keys (in future, values) may
@@ -645,7 +309,7 @@ class ha_rocksdb : public my_core::handler {
/*
MySQL index number for duplicate key error
*/
- int m_dupp_errkey;
+ uint m_dupp_errkey;
int create_key_defs(const TABLE *const table_arg,
Rdb_tbl_def *const tbl_def_arg,
@@ -678,11 +342,10 @@ class ha_rocksdb : public my_core::handler {
MY_ATTRIBUTE((__nonnull__));
void release_scan_iterator(void);
- rocksdb::Status
- get_for_update(Rdb_transaction *const tx,
- rocksdb::ColumnFamilyHandle *const column_family,
- const rocksdb::Slice &key,
- rocksdb::PinnableSlice *value) const;
+ rocksdb::Status get_for_update(
+ Rdb_transaction *const tx,
+ rocksdb::ColumnFamilyHandle *const column_family,
+ const rocksdb::Slice &key, rocksdb::PinnableSlice *value) const;
int get_row_by_rowid(uchar *const buf, const char *const rowid,
const uint rowid_size, const bool skip_lookup = false,
@@ -707,7 +370,7 @@ class ha_rocksdb : public my_core::handler {
int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__));
int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- bool can_use_single_delete(const uint &index) const
+ bool can_use_single_delete(const uint index) const
MY_ATTRIBUTE((__warn_unused_result__));
bool is_blind_delete_enabled();
bool skip_unique_check() const MY_ATTRIBUTE((__warn_unused_result__));
@@ -725,53 +388,12 @@ class ha_rocksdb : public my_core::handler {
void set_last_rowkey(const uchar *const old_data);
/*
- Array of table->s->fields elements telling how to store fields in the
- record.
- */
- Rdb_field_encoder *m_encoder_arr;
-
- /* Describes instructions on how to decode the field */
- class READ_FIELD {
- public:
- /* Points to Rdb_field_encoder describing the field */
- Rdb_field_encoder *m_field_enc;
- /* if true, decode the field, otherwise skip it */
- bool m_decode;
- /* Skip this many bytes before reading (or skipping) this field */
- int m_skip;
- };
-
- /*
- This tells which table fields should be decoded (or skipped) when
- decoding table row from (pk, encoded_row) pair. (Secondary keys are
- just always decoded in full currently)
- */
- std::vector<READ_FIELD> m_decoders_vect;
-
- /*
- This tells if any field which is part of the key needs to be unpacked and
- decoded.
- */
- bool m_key_requested = false;
-
- /* Setup field_decoders based on type of scan and table->read_set */
- void setup_read_decoders();
-
- /*
For the active index, indicates which columns must be covered for the
current lookup to be covered. If the bitmap field is null, that means this
index does not cover the current lookup for any record.
*/
MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0};
- /*
- Number of bytes in on-disk (storage) record format that are used for
- storing SQL NULL flags.
- */
- uint m_null_bytes_in_rec;
-
- void get_storage_type(Rdb_field_encoder *const encoder, const uint &kp);
- void setup_field_converters();
int alloc_key_buffers(const TABLE *const table_arg,
const Rdb_tbl_def *const tbl_def_arg,
bool alloc_alter_buffers = false)
@@ -779,7 +401,7 @@ class ha_rocksdb : public my_core::handler {
void free_key_buffers();
// the buffer size should be at least 2*Rdb_key_def::INDEX_NUMBER_SIZE
- rocksdb::Range get_range(const int &i, uchar buf[]) const;
+ rocksdb::Range get_range(const int i, uchar buf[]) const;
/*
Perf timers for data reads
@@ -787,17 +409,11 @@ class ha_rocksdb : public my_core::handler {
Rdb_io_perf m_io_perf;
/*
- A counter of how many row checksums were checked for this table. Note that
- this does not include checksums for secondary index entries.
- */
- my_core::ha_rows m_row_checksums_checked;
-
- /*
Update stats
*/
void update_stats(void);
-public:
+ public:
/*
The following two are currently only used for getting the range bounds
from QUICK_SELECT_DESC.
@@ -816,19 +432,19 @@ public:
*/
bool m_store_row_debug_checksums;
- /* Same as above but for verifying checksums when reading */
- bool m_verify_row_debug_checksums;
int m_checksums_pct;
ha_rocksdb(my_core::handlerton *const hton,
my_core::TABLE_SHARE *const table_arg);
- ~ha_rocksdb() {
+ virtual ~ha_rocksdb() override {
int err MY_ATTRIBUTE((__unused__));
err = finalize_bulk_load(false);
if (err != 0) {
- sql_print_error("RocksDB: Error %d finalizing bulk load while closing "
- "handler.",
- err);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Error %d finalizing bulk load while closing "
+ "handler.",
+ err);
}
}
@@ -898,21 +514,6 @@ public:
int rename_table(const char *const from, const char *const to) override
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- int convert_blob_from_storage_format(my_core::Field_blob *const blob,
- Rdb_string_reader *const reader,
- bool decode)
- MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
-
- int convert_varchar_from_storage_format(
- my_core::Field_varstring *const field_var,
- Rdb_string_reader *const reader, bool decode)
- MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
-
- int convert_field_from_storage_format(my_core::Field *const field,
- Rdb_string_reader *const reader,
- bool decode, uint len)
- MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
-
int convert_record_from_storage_format(const rocksdb::Slice *const key,
const rocksdb::Slice *const value,
uchar *const buf)
@@ -925,10 +526,9 @@ public:
static const std::vector<std::string> parse_into_tokens(const std::string &s,
const char delim);
- static const std::string generate_cf_name(const uint index,
- const TABLE *const table_arg,
- const Rdb_tbl_def *const tbl_def_arg,
- bool *per_part_match_found);
+ static const std::string generate_cf_name(
+ const uint index, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found);
static const char *get_key_name(const uint index,
const TABLE *const table_arg,
@@ -1067,7 +667,7 @@ public:
/*
Default implementation from cancel_pushed_idx_cond() suits us
*/
-private:
+ private:
struct key_def_cf_info {
rocksdb::ColumnFamilyHandle *cf_handle;
bool is_reverse_cf;
@@ -1087,16 +687,6 @@ private:
longlong hidden_pk_id;
bool skip_unique_check;
-
- // In certain cases, TTL is enabled on a table, as well as an explicit TTL
- // column. The TTL column can be part of either the key or the value part
- // of the record. If it is part of the key, we store the offset here.
- //
- // Later on, we use this offset to store the TTL in the value part of the
- // record, which we can then access in the compaction filter.
- //
- // Set to UINT_MAX by default to indicate that the TTL is not in key.
- uint ttl_pk_offset = UINT_MAX;
};
/*
@@ -1129,23 +719,24 @@ private:
std::array<struct key_def_cf_info, MAX_INDEXES + 1> *const cfs)
const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- int create_key_def(const TABLE *const table_arg, const uint &i,
+ int create_key_def(const TABLE *const table_arg, const uint i,
const Rdb_tbl_def *const tbl_def_arg,
std::shared_ptr<Rdb_key_def> *const new_key_def,
- const struct key_def_cf_info &cf_info) const
+ const struct key_def_cf_info &cf_info, uint64 ttl_duration,
+ const std::string &ttl_column) const
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int create_inplace_key_defs(
const TABLE *const table_arg, Rdb_tbl_def *vtbl_def_arg,
const TABLE *const old_table_arg,
const Rdb_tbl_def *const old_tbl_def_arg,
- const std::array<key_def_cf_info, MAX_INDEXES + 1> &cfs) const
+ const std::array<key_def_cf_info, MAX_INDEXES + 1> &cf,
+ uint64 ttl_duration, const std::string &ttl_column) const
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- std::unordered_map<std::string, uint>
- get_old_key_positions(const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg,
- const TABLE *old_table_arg,
- const Rdb_tbl_def *old_tbl_def_arg) const
+ std::unordered_map<std::string, uint> get_old_key_positions(
+ const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg,
+ const TABLE *old_table_arg, const Rdb_tbl_def *old_tbl_def_arg) const
MY_ATTRIBUTE((__nonnull__));
int compare_key_parts(const KEY *const old_key,
@@ -1155,24 +746,20 @@ private:
int compare_keys(const KEY *const old_key, const KEY *const new_key) const
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- int convert_record_to_storage_format(const struct update_row_info &row_info,
- rocksdb::Slice *const packed_rec)
- MY_ATTRIBUTE((__nonnull__));
-
bool should_hide_ttl_rec(const Rdb_key_def &kd,
const rocksdb::Slice &ttl_rec_val,
const int64_t curr_ts)
MY_ATTRIBUTE((__warn_unused_result__));
- void rocksdb_skip_expired_records(const Rdb_key_def &kd,
- rocksdb::Iterator *const iter,
- bool seek_backward);
+ int rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward);
int index_first_intern(uchar *buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int index_last_intern(uchar *buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- int find_icp_matching_index_rec(const bool &move_forward, uchar *const buf)
+ int find_icp_matching_index_rec(const bool move_forward, uchar *const buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void calc_updated_indexes();
@@ -1180,20 +767,20 @@ private:
const bool skip_unique_check)
MY_ATTRIBUTE((__warn_unused_result__));
int get_pk_for_update(struct update_row_info *const row_info);
- int check_and_lock_unique_pk(const uint &key_id,
+ int check_and_lock_unique_pk(const uint key_id,
const struct update_row_info &row_info,
- bool *const found, bool *const pk_changed)
+ bool *const found)
MY_ATTRIBUTE((__warn_unused_result__));
- int check_and_lock_sk(const uint &key_id,
+ int check_and_lock_sk(const uint key_id,
const struct update_row_info &row_info,
bool *const found)
MY_ATTRIBUTE((__warn_unused_result__));
int check_uniqueness_and_lock(const struct update_row_info &row_info,
- bool *const pk_changed)
+ bool pk_changed)
MY_ATTRIBUTE((__warn_unused_result__));
bool over_bulk_load_threshold(int *err)
MY_ATTRIBUTE((__warn_unused_result__));
- int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &index,
+ int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &key_def,
const rocksdb::Slice *key,
struct unique_sk_buf_info *sk_info)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -1202,32 +789,36 @@ private:
bool sort)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void update_bytes_written(ulonglong bytes_written);
- int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info,
- const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__));
- int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
- const struct update_row_info &row_info, const bool bulk_load_sk)
+ int update_write_pk(const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ const bool pk_changed)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ int update_write_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
+ const struct update_row_info &row_info,
+ const bool bulk_load_sk)
MY_ATTRIBUTE((__warn_unused_result__));
- int update_indexes(const struct update_row_info &row_info,
- const bool &pk_changed)
+ int update_write_indexes(const struct update_row_info &row_info,
+ const bool pk_changed)
MY_ATTRIBUTE((__warn_unused_result__));
int read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter,
- const bool &using_full_key,
- const rocksdb::Slice &key_slice,
+ const bool using_full_key, const rocksdb::Slice &key_slice,
const int64_t ttl_filter_ts)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- int read_before_key(const Rdb_key_def &kd, const bool &using_full_key,
+ int read_before_key(const Rdb_key_def &kd, const bool using_full_key,
const rocksdb::Slice &key_slice,
const int64_t ttl_filter_ts)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice,
const int64_t ttl_filter_ts)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- int position_to_correct_key(
- const Rdb_key_def &kd, const enum ha_rkey_function &find_flag,
- const bool &full_key_match, const uchar *const key,
- const key_part_map &keypart_map, const rocksdb::Slice &key_slice,
- bool *const move_forward, const int64_t ttl_filter_ts)
+ int position_to_correct_key(const Rdb_key_def &kd,
+ const enum ha_rkey_function &find_flag,
+ const bool full_key_match, const uchar *const key,
+ const key_part_map &keypart_map,
+ const rocksdb::Slice &key_slice,
+ bool *const move_forward,
+ const int64_t ttl_filter_ts)
MY_ATTRIBUTE((__warn_unused_result__));
int read_row_from_primary_key(uchar *const buf)
@@ -1239,7 +830,7 @@ private:
int calc_eq_cond_len(const Rdb_key_def &kd,
const enum ha_rkey_function &find_flag,
const rocksdb::Slice &slice,
- const int &bytes_changed_by_succ,
+ const int bytes_changed_by_succ,
const key_range *const end_key,
uint *const end_key_packed_size)
MY_ATTRIBUTE((__warn_unused_result__));
@@ -1261,6 +852,10 @@ private:
int calculate_stats_for_table() MY_ATTRIBUTE((__warn_unused_result__));
+ bool should_skip_invalidated_record(const int rc);
+ bool should_recreate_snapshot(const int rc, const bool is_new_snapshot);
+ bool can_assume_tracked(THD *thd);
+
public:
int index_init(uint idx, bool sorted) override
MY_ATTRIBUTE((__warn_unused_result__));
@@ -1315,11 +910,15 @@ private:
ha_rows records_in_range(uint inx, key_range *const min_key,
key_range *const max_key) override
MY_ATTRIBUTE((__warn_unused_result__));
+
+ int delete_table(Rdb_tbl_def *const tbl);
int delete_table(const char *const from) override
MY_ATTRIBUTE((__warn_unused_result__));
int create(const char *const name, TABLE *const form,
HA_CREATE_INFO *const create_info) override
MY_ATTRIBUTE((__warn_unused_result__));
+ int create_table(const std::string &table_name, const TABLE *table_arg,
+ ulonglong auto_increment_value);
bool check_if_incompatible_data(HA_CREATE_INFO *const info,
uint table_changes) override
MY_ATTRIBUTE((__warn_unused_result__));
@@ -1367,26 +966,25 @@ private:
TABLE *const altered_table,
my_core::Alter_inplace_info *const ha_alter_info) override;
- bool
- commit_inplace_alter_table(TABLE *const altered_table,
- my_core::Alter_inplace_info *const ha_alter_info,
- bool commit) override;
+ bool commit_inplace_alter_table(
+ TABLE *const altered_table,
+ my_core::Alter_inplace_info *const ha_alter_info, bool commit) override;
+ void set_skip_unique_check_tables(const char *const whitelist);
#ifdef MARIAROCKS_NOT_YET // MDEV-10976
- void set_use_read_free_rpl(const char *const whitelist);
+ bool is_read_free_rpl_table() const;
#endif
- void set_skip_unique_check_tables(const char *const whitelist);
#ifdef MARIAROCKS_NOT_YET // MDEV-10976
-public:
+ public:
virtual void rpl_before_delete_rows() override;
virtual void rpl_after_delete_rows() override;
virtual void rpl_before_update_rows() override;
virtual void rpl_after_update_rows() override;
- virtual bool use_read_free_rpl() override;
+ virtual bool use_read_free_rpl() const override;
#endif // MARIAROCKS_NOT_YET
-private:
+ private:
/* Flags tracking if we are inside different replication operation */
bool m_in_rpl_delete_rows;
bool m_in_rpl_update_rows;
@@ -1436,16 +1034,21 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx {
std::unordered_set<std::shared_ptr<Rdb_key_def>> added_indexes,
std::unordered_set<GL_INDEX_ID> dropped_index_ids, uint n_added_keys,
uint n_dropped_keys, ulonglong max_auto_incr)
- : my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef),
- m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr),
- m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys),
- m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids),
- m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys),
+ : my_core::inplace_alter_handler_ctx(),
+ m_new_tdef(new_tdef),
+ m_old_key_descr(old_key_descr),
+ m_new_key_descr(new_key_descr),
+ m_old_n_keys(old_n_keys),
+ m_new_n_keys(new_n_keys),
+ m_added_indexes(added_indexes),
+ m_dropped_index_ids(dropped_index_ids),
+ m_n_added_keys(n_added_keys),
+ m_n_dropped_keys(n_dropped_keys),
m_max_auto_incr(max_auto_incr) {}
~Rdb_inplace_alter_ctx() {}
-private:
+ private:
/* Disable Copying */
Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &);
Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &);
@@ -1460,5 +1063,5 @@ extern bool prevent_myrocks_loading;
void sql_print_verbose_info(const char *format, ...);
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h
index 26417328194..03d24957a23 100644
--- a/storage/rocksdb/ha_rocksdb_proto.h
+++ b/storage/rocksdb/ha_rocksdb_proto.h
@@ -78,7 +78,7 @@ Rdb_cf_manager &rdb_get_cf_manager();
const rocksdb::BlockBasedTableOptions &rdb_get_table_options();
bool rdb_is_ttl_enabled();
bool rdb_is_ttl_read_filtering_enabled();
-#ifndef NDEBUG
+#ifndef DBUG_OFF
int rdb_dbug_set_ttl_rec_ts();
int rdb_dbug_set_ttl_snapshot_ts();
int rdb_dbug_set_ttl_read_filter_ts();
@@ -100,4 +100,4 @@ Rdb_ddl_manager *rdb_get_ddl_manager(void)
class Rdb_binlog_manager;
Rdb_binlog_manager *rdb_get_binlog_manager(void)
MY_ATTRIBUTE((__warn_unused_result__));
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/logger.h b/storage/rocksdb/logger.h
index b2820127711..8902bc18893 100644
--- a/storage/rocksdb/logger.h
+++ b/storage/rocksdb/logger.h
@@ -22,7 +22,7 @@
namespace myrocks {
class Rdb_logger : public rocksdb::Logger {
-public:
+ public:
explicit Rdb_logger(const rocksdb::InfoLogLevel log_level =
rocksdb::InfoLogLevel::ERROR_LEVEL)
: m_mysql_log_level(log_level) {}
@@ -77,9 +77,9 @@ public:
m_mysql_log_level = log_level;
}
-private:
+ private:
std::shared_ptr<rocksdb::Logger> m_logger;
rocksdb::InfoLogLevel m_mysql_log_level;
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/myrocks_hotbackup.py b/storage/rocksdb/myrocks_hotbackup.py
index 69c75b7cbfd..fcb3e7088da 100755
--- a/storage/rocksdb/myrocks_hotbackup.py
+++ b/storage/rocksdb/myrocks_hotbackup.py
@@ -45,12 +45,14 @@ class Writer(object):
class StreamWriter(Writer):
stream_cmd= ''
- def __init__(self, stream_option):
+ def __init__(self, stream_option, direct = 0):
super(StreamWriter, self).__init__()
if stream_option == 'tar':
self.stream_cmd= 'tar chf -'
elif stream_option == 'xbstream':
self.stream_cmd= 'xbstream -c'
+ if direct:
+ self.stream_cmd = self.stream_cmd + ' -d'
else:
raise Exception("Only tar or xbstream is supported as streaming option.")
@@ -342,6 +344,13 @@ class MySQLUtil:
row = cur.fetchone()
return row[0]
+ @staticmethod
+ def is_directio_enabled(dbh):
+ sql = "SELECT @@global.rocksdb_use_direct_reads"
+ cur = dbh.cursor()
+ cur.execute(sql)
+ row = cur.fetchone()
+ return row[0]
class BackupRunner:
datadir = None
@@ -363,9 +372,7 @@ class BackupRunner:
try:
signal.signal(signal.SIGINT, signal_handler)
w = None
- if opts.output_stream:
- w = StreamWriter(opts.output_stream)
- else:
+ if not opts.output_stream:
raise Exception("Currently only streaming backup is supported.")
snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round)
@@ -373,6 +380,11 @@ class BackupRunner:
opts.mysql_password,
opts.mysql_port,
opts.mysql_socket)
+ direct = MySQLUtil.is_directio_enabled(dbh)
+ logger.info("Direct I/O: %d", direct)
+
+ w = StreamWriter(opts.output_stream, direct)
+
if not self.datadir:
self.datadir = MySQLUtil.get_datadir(dbh)
logger.info("Set datadir: %s", self.datadir)
diff --git a/storage/rocksdb/mysql-test/rocksdb/combinations b/storage/rocksdb/mysql-test/rocksdb/combinations
index d49de3d38b3..be8080d4b9b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/combinations
+++ b/storage/rocksdb/mysql-test/rocksdb/combinations
@@ -3,4 +3,3 @@ loose-rocksdb_write_policy=write_committed
[write_prepared]
loose-rocksdb_write_policy=write_prepared
-loose-rocksdb_commit_time_batch_for_recovery=on
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
index 8ec97510dbd..29ec94188a2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc
@@ -89,20 +89,32 @@ EOF
# Make sure a snapshot held by another user doesn't block the bulk load
connect (other,localhost,root,,);
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+# Assert that there is a pending snapshot
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
connection default;
+
+# Update CF to smaller value to create multiple SST in ingestion
+eval SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+ '$pk_cf_name={write_buffer_size=8m;target_file_size_base=1m};';
+
set rocksdb_bulk_load=1;
set rocksdb_bulk_load_size=100000;
--disable_query_log
--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
eval LOAD DATA INFILE '$file' INTO TABLE t1;
+# There should be no SST being ingested
+select * from t1;
--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
eval LOAD DATA INFILE '$file' INTO TABLE t2;
+# There should be no SST being ingested
+select * from t2;
--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
eval LOAD DATA INFILE '$file' INTO TABLE t3;
+# There should be no SST being ingested
+select * from t3;
--enable_query_log
set rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
index 5f808087e3e..46aea8f23b5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc
@@ -100,9 +100,10 @@ EOF
# Make sure a snapshot held by another user doesn't block the bulk load
connect (other,localhost,root,,);
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+# Assert that there is a pending snapshot
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
connection default;
set rocksdb_bulk_load=1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc
new file mode 100644
index 00000000000..233635b369e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc
@@ -0,0 +1,298 @@
+CREATE TABLE `link_table` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+ `time` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' ,
+ KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+ `version` , `data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+CREATE TABLE `link_table2` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ `visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+ `time` int(10) unsigned NOT NULL DEFAULT '0' ,
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+ PRIMARY KEY (`link_type` , `id1` , `id2`)
+ COMMENT 'cf_link' ,
+ KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+ `version` , `data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9;
+
+insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125);
+insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125);
+insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125);
+insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125);
+insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125);
+insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125);
+insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125);
+insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125);
+insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125);
+insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125);
+insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125);
+insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125);
+insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125);
+
+insert into link_table2 select * from link_table;
+
+CREATE TABLE `id_table` (
+ `id` bigint(20) NOT NULL DEFAULT '0',
+ `type` int(11) NOT NULL DEFAULT '0',
+ `row_created_time` int(11) NOT NULL DEFAULT '0',
+ `hash_key` varchar(255) NOT NULL DEFAULT '',
+ `is_deleted` tinyint(4) DEFAULT '0',
+ PRIMARY KEY (`id`),
+ KEY `type_id` (`type`,`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
+KEY_BLOCK_SIZE=8;
+
+insert into id_table values (1, 1, 10, '111', 0);
+insert into id_table values (2, 1, 10, '111', 1);
+insert into id_table values (3, 1, 10, '111', 0);
+insert into id_table values (4, 1, 10, '111', 1);
+insert into id_table values (5, 1, 10, '111', 0);
+insert into id_table values (6, 1, 10, '111', 1);
+insert into id_table values (7, 1, 10, '111', 0);
+insert into id_table values (8, 1, 10, '111', 1);
+insert into id_table values (9, 1, 10, '111', 0);
+insert into id_table values (10, 1, 10, '111', 1);
+
+CREATE TABLE `node_table` (
+ `id` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `type` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `update_time` int(10) unsigned NOT NULL DEFAULT '0',
+ `data` mediumtext COLLATE latin1_bin NOT NULL,
+ PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id',
+ KEY `id` (`id`) COMMENT 'cf_node'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+insert into node_table values (1, 1, 1, 10, 'data');
+
+insert into node_table values (2, 1, 1, 10, 'data');
+
+insert into node_table values (3, 1, 1, 10, 'data');
+
+insert into node_table values (4, 1, 1, 10, 'data');
+
+insert into node_table values (5, 1, 1, 10, 'data');
+
+insert into node_table values (6, 1, 1, 10, 'data');
+
+insert into node_table values (7, 1, 1, 10, 'data');
+
+insert into node_table values (8, 1, 1, 10, 'data');
+
+insert into node_table values (9, 1, 1, 10, 'data');
+
+insert into node_table values (10, 1, 1, 10, 'data');
+
+CREATE TABLE `count_table` (
+ `id` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `count` int(10) unsigned NOT NULL DEFAULT '0',
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+
+insert into count_table values (2, 1, 1, 1, 10, 20);
+
+insert into count_table values (3, 1, 1, 1, 10, 20);
+
+insert into count_table values (4, 1, 1, 1, 10, 20);
+
+insert into count_table values (5, 1, 1, 1, 10, 20);
+
+insert into count_table values (6, 1, 1, 1, 10, 20);
+
+insert into count_table values (7, 1, 1, 1, 10, 20);
+
+insert into count_table values (8, 1, 1, 1, 10, 20);
+
+insert into count_table values (9, 1, 1, 1, 10, 20);
+
+insert into count_table values (10, 1, 1, 1, 10, 20);
+
+CREATE TABLE `link_table5` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+
+
+CREATE TABLE `link_table3` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(4) NOT NULL DEFAULT '0',
+ `data` text COLLATE latin1_bin NOT NULL,
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`)
+ COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+
+insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+
+CREATE TABLE `link_table6` (
+ `id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(4) NOT NULL DEFAULT '0',
+ `data` text COLLATE latin1_bin NOT NULL,
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,
+ `data`(255)) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+
+insert into link_table6 values (1, 1, 2, 2, 1, 1,
+ 'data12_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 3, 2, 1, 2,
+ 'data13_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 4, 2, 1, 2,
+ 'data14_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 5, 2, 1, 1,
+ 'data15_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 1, 2, 1, 1,
+ 'data21_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 2, 2, 1, 1,
+ 'data22_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 3, 2, 1, 1,
+ 'data32_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+
+CREATE TABLE `link_table4` (
+ `id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
+ `raw_key` text COLLATE latin1_bin,
+ `id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+ `link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+ `visibility` tinyint(3) NOT NULL DEFAULT '0',
+ `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+ `time` int(10) unsigned NOT NULL DEFAULT '0',
+ `version` bigint(20) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+ KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`)
+ COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+
+insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1);
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc
new file mode 100644
index 00000000000..79ac367a73b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc
@@ -0,0 +1,1425 @@
+#
+# Test file for WL#1724 (Min/Max Optimization for Queries with Group By Clause).
+# The queries in this file test query execution via QUICK_GROUP_MIN_MAX_SELECT.
+#
+
+#
+# TODO:
+# Add queries with:
+# - C != const
+# - C IS NOT NULL
+# - HAVING clause
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+eval create table t1 (
+ a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' '
+) engine=$engine;
+
+insert into t1 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'),
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4');
+
+create index idx_t1_0 on t1 (a1);
+create index idx_t1_1 on t1 (a1,a2,b,c);
+create index idx_t1_2 on t1 (a1,a2,b);
+analyze table t1;
+
+# t2 is the same as t1, but with some NULLs in the MIN/MAX column, and
+# one more nullable attribute
+
+--disable_warnings
+drop table if exists t2;
+--enable_warnings
+
+eval create table t2 (
+ a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' '
+) engine=$engine;
+insert into t2 select * from t1;
+# add few rows with NULL's in the MIN/MAX column
+insert into t2 (a1, a2, b, c, d) values
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz');
+
+create index idx_t2_0 on t2 (a1);
+create index idx_t2_1 on t2 (a1,a2,b,c);
+create index idx_t2_2 on t2 (a1,a2,b);
+analyze table t2;
+
+# Table t3 is the same as t1, but with smaller column lenghts.
+# This allows to test different branches of the cost computation procedure
+# when the number of keys per block are less than the number of keys in the
+# sub-groups formed by predicates over non-group attributes.
+
+--disable_warnings
+drop table if exists t3;
+--enable_warnings
+
+eval create table t3 (
+ a1 char(1), a2 char(1), b char(1), c char(4) not null, d char(3), dummy char(1) default ' '
+) engine=$engine;
+
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+
+create index idx_t3_0 on t3 (a1);
+create index idx_t3_1 on t3 (a1,a2,b,c);
+create index idx_t3_2 on t3 (a1,a2,b);
+analyze table t3;
+
+
+#
+# Queries without a WHERE clause. These queries do not use ranges.
+#
+
+# plans
+explain select a1, min(a2) from t1 group by a1;
+explain select a1, max(a2) from t1 group by a1;
+explain select a1, min(a2), max(a2) from t1 group by a1;
+explain select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+explain select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+--replace_column 7 # 9 #
+explain select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+# Select fields in different order
+explain select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+explain select min(a2) from t1 group by a1;
+explain select a2, min(c), max(c) from t1 group by a1,a2,b;
+
+# queries
+select a1, min(a2) from t1 group by a1;
+select a1, max(a2) from t1 group by a1;
+select a1, min(a2), max(a2) from t1 group by a1;
+select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+# Select fields in different order
+select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+select min(a2) from t1 group by a1;
+select a2, min(c), max(c) from t1 group by a1,a2,b;
+
+#
+# Queries with a where clause
+#
+
+# A) Preds only over the group 'A' attributes
+# plans
+explain select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+explain select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+explain select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+explain select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+--replace_column 9 #
+explain select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+
+# queries
+select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+
+select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+
+# B) Equalities only over the non-group 'B' attributes
+# plans
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+explain select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+explain select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+explain select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+explain select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+explain select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost()
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+# queries
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+
+# these queries test case 2) in TRP_GROUP_MIN_MAX::update_cost()
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+
+
+# IS NULL (makes sense for t2 only)
+# plans
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+
+explain select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+explain select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+explain select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+
+# queries
+select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+
+# C) Range predicates for the MIN/MAX attribute
+# plans
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+
+# queries
+select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+
+select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+
+# analyze the sub-select
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c = t1.c )
+group by a1,a2,b;
+
+# the sub-select is unrelated to MIN/MAX
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c > 'b1' )
+group by a1,a2,b;
+
+
+# A,B,C) Predicates referencing mixed classes of attributes
+# plans
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+explain select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+explain select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+# queries
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+
+#
+# GROUP BY queries without MIN/MAX
+#
+
+# plans
+explain select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+explain select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+explain select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+--replace_column 9 #
+explain select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+--replace_column 9 #
+explain select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+# queries
+select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+
+#
+# DISTINCT queries
+#
+
+# plans
+explain select distinct a1,a2,b from t1;
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+
+--replace_column 9 #
+explain select distinct a1,a2,b from t2;
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+
+# queries
+select distinct a1,a2,b from t1;
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+
+select distinct a1,a2,b from t2;
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+
+# BUG #6303
+select distinct t_00.a1
+from t1 t_00
+where exists ( select * from t2 where a1 = t_00.a1 );
+
+# BUG #8532 - SELECT DISTINCT a, a causes server to crash
+select distinct a1,a1 from t1;
+select distinct a2,a1,a2,a1 from t1;
+select distinct t1.a1,t2.a1 from t1,t2;
+
+
+#
+# DISTINCT queries with GROUP-BY
+#
+
+# plans
+explain select distinct a1,a2,b from t1;
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+explain select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+--replace_column 9 #
+explain select distinct a1,a2,b from t2;
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+--replace_column 9 #
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+--replace_column 9 #
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+# queries
+select distinct a1,a2,b from t1;
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+select distinct a1,a2,b from t2;
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+
+
+#
+# COUNT (DISTINCT cols) queries
+#
+
+explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+explain extended select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+
+select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+
+#
+# Queries with expressions in the select clause
+#
+
+explain select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+explain select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+
+select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+
+
+#
+# Negative examples: queries that should NOT be treated as optimizable by
+# QUICK_GROUP_MIN_MAX_SELECT
+#
+
+# select a non-indexed attribute
+explain select a1,a2,b,d,min(c),max(c) from t1 group by a1,a2,b;
+
+explain select a1,a2,b,d from t1 group by a1,a2,b;
+
+# predicate that references an attribute that is after the MIN/MAX argument
+# in the index
+explain extended select a1,a2,min(b),max(b) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2;
+
+# predicate that references a non-indexed attribute
+explain extended select a1,a2,b,min(c),max(c) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b;
+
+explain extended select a1,a2,b,c from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c;
+
+# non-equality predicate for a non-group select attribute
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b < 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b') group by a1;
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b' and b >= 'a') group by a1;
+explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b;
+
+# non-group field with an equality predicate that references a keypart after the
+# MIN/MAX argument
+explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+
+# disjunction for a non-group select attribute
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1;
+
+# non-range predicate for the MIN/MAX attribute
+explain select a1,a2,b,min(c),max(c) from t2
+where (c > 'a000') and (c <= 'd999') and (c like '_8__') group by a1,a2,b;
+
+# not all attributes are indexed by one index
+explain select a1, a2, b, c, min(d), max(d) from t1 group by a1,a2,b,c;
+
+# other aggregate functions than MIN/MAX
+explain select a1,a2,count(a2) from t1 group by a1,a2,b;
+explain extended select a1,a2,count(a2) from t1 where (a1 > 'a') group by a1,a2,b;
+explain extended select sum(ord(a1)) from t1 where (a1 > 'a') group by a1,a2,b;
+
+# test multi_range_groupby flag
+set optimizer_switch = 'multi_range_groupby=off';
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+set optimizer_switch = 'default';
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+
+
+#
+# Bug #16710: select distinct doesn't return all it should
+#
+
+explain select distinct(a1) from t1 where ord(a2) = 98;
+select distinct(a1) from t1 where ord(a2) = 98;
+
+#
+# BUG#11044: DISTINCT or GROUP BY queries with equality predicates instead of MIN/MAX.
+#
+
+explain select a1 from t1 where a2 = 'b' group by a1;
+select a1 from t1 where a2 = 'b' group by a1;
+
+explain select distinct a1 from t1 where a2 = 'b';
+select distinct a1 from t1 where a2 = 'b';
+
+#
+# Bug #12672: primary key implcitly included in every innodb index
+#
+# Test case moved to group_min_max_innodb
+
+
+#
+# Bug #6142: a problem with the empty innodb table
+#
+# Test case moved to group_min_max_innodb
+
+
+#
+# Bug #9798: group by with rollup
+#
+# Test case moved to group_min_max_innodb
+
+
+#
+# Bug #13293 Wrongly used index results in endless loop.
+#
+# Test case moved to group_min_max_innodb
+
+
+drop table t1,t2,t3;
+
+#
+# Bug #14920 Ordering aggregated result sets with composite primary keys
+# corrupts resultset
+#
+eval create table t1 (c1 int not null,c2 int not null, primary key(c1,c2)) engine=$engine;
+insert into t1 (c1,c2) values
+(10,1),(10,2),(10,3),(20,4),(20,5),(20,6),(30,7),(30,8),(30,9);
+select distinct c1, c2 from t1 order by c2;
+select c1,min(c2) as c2 from t1 group by c1 order by c2;
+select c1,c2 from t1 group by c1,c2 order by c2;
+drop table t1;
+
+#
+# Bug #16203: Analysis for possible min/max optimization erroneously
+# returns impossible range
+#
+
+eval CREATE TABLE t1 (a varchar(5), b int(11), PRIMARY KEY (a,b)) engine=$engine;
+INSERT INTO t1 VALUES ('AA',1), ('AA',2), ('AA',3), ('BB',1), ('AA',4);
+OPTIMIZE TABLE t1;
+
+SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+
+EXPLAIN SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+EXPLAIN SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+
+SELECT DISTINCT a FROM t1 WHERE a='BB';
+SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%';
+SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a;
+
+DROP TABLE t1;
+
+
+#
+# Bug #15102: select distinct returns empty result, select count
+# distinct > 0 (correct)
+#
+
+CREATE TABLE t1 (
+ a int(11) NOT NULL DEFAULT '0',
+ b varchar(16) COLLATE latin1_general_ci NOT NULL DEFAULT '',
+ PRIMARY KEY (a,b)
+ ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;
+
+delimiter |;
+
+CREATE PROCEDURE a(x INT)
+BEGIN
+ DECLARE rnd INT;
+ DECLARE cnt INT;
+
+ WHILE x > 0 DO
+ SET rnd= x % 100;
+ SET cnt = (SELECT COUNT(*) FROM t1 WHERE a = rnd);
+ INSERT INTO t1(a,b) VALUES (rnd, CAST(cnt AS CHAR));
+ SET x= x - 1;
+ END WHILE;
+END|
+
+DELIMITER ;|
+
+CALL a(1000);
+
+SELECT a FROM t1 WHERE a=0;
+SELECT DISTINCT a FROM t1 WHERE a=0;
+SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0;
+
+DROP TABLE t1;
+DROP PROCEDURE a;
+
+#
+# Bug #18068: SELECT DISTINCT
+#
+
+eval CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a)) engine=$engine;
+
+INSERT INTO t1 (a) VALUES
+ (''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
+ ('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
+ ('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
+
+EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
+SELECT DISTINCT a,a FROM t1 ORDER BY a;
+
+DROP TABLE t1;
+
+#
+# Bug #21007: NATURAL JOIN (any JOIN (2 x NATURAL JOIN)) crashes the server
+#
+
+eval CREATE TABLE t1 (id1 INT, id2 INT) engine=$engine;
+eval CREATE TABLE t2 (id2 INT, id3 INT, id5 INT) engine=$engine;
+eval CREATE TABLE t3 (id3 INT, id4 INT) engine=$engine;
+eval CREATE TABLE t4 (id4 INT) engine=$engine;
+eval CREATE TABLE t5 (id5 INT, id6 INT) engine=$engine;
+eval CREATE TABLE t6 (id6 INT) engine=$engine;
+
+INSERT INTO t1 VALUES(1,1);
+INSERT INTO t2 VALUES(1,1,1);
+INSERT INTO t3 VALUES(1,1);
+INSERT INTO t4 VALUES(1);
+INSERT INTO t5 VALUES(1,1);
+INSERT INTO t6 VALUES(1);
+
+# original bug query
+SELECT * FROM
+t1
+ NATURAL JOIN
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+
+# inner join swapped
+SELECT * FROM
+t1
+ NATURAL JOIN
+(((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6) on t3.id4 = t5.id5) JOIN t2
+ ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+
+# one join less, no ON cond
+SELECT * FROM t1 NATURAL JOIN ((t3 join (t5 NATURAL JOIN t6)) JOIN t2);
+
+# wrong error message: 'id2' - ambiguous column
+SELECT * FROM
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5))
+ NATURAL JOIN
+t1;
+SELECT * FROM
+(t2 JOIN ((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6)))
+ NATURAL JOIN
+t1;
+
+DROP TABLE t1,t2,t3,t4,t5,t6;
+
+#
+# Bug#22342: No results returned for query using max and group by
+#
+eval CREATE TABLE t1 (a int, b int, PRIMARY KEY (a,b), KEY b (b)) engine=$engine;
+INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3);
+ANALYZE TABLE t1;
+
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
+eval CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)) engine=$engine;
+INSERT INTO t2 SELECT a,b,b FROM t1;
+ANALYZE TABLE t2;
+explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+
+DROP TABLE t1,t2;
+
+#
+# Bug#24156: Loose index scan not used with CREATE TABLE ...SELECT and similar statements
+#
+
+eval CREATE TABLE t1 (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3), (1,4), (1,5),
+ (2,2), (2,3), (2,1), (3,1), (4,1), (4,2), (4,3), (4,4), (4,5), (4,6);
+ANALYZE TABLE t1;
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+FLUSH STATUS;
+SELECT max(b), a FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+FLUSH STATUS;
+eval CREATE TABLE t2 engine=$engine SELECT max(b), a FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+SELECT * FROM (SELECT max(b), a FROM t1 GROUP BY a) b;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+(SELECT max(b), a FROM t1 GROUP BY a) UNION
+ (SELECT max(b), a FROM t1 GROUP BY a);
+SHOW STATUS LIKE 'handler_read__e%';
+EXPLAIN (SELECT max(b), a FROM t1 GROUP BY a) UNION
+ (SELECT max(b), a FROM t1 GROUP BY a);
+
+EXPLAIN SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+ FROM t1 AS t1_outer;
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE EXISTS
+ (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+ (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) > 12;
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+ a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING
+ a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+EXPLAIN SELECT 1 FROM t1 AS t1_outer1 JOIN t1 AS t1_outer2
+ ON t1_outer1.a = (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2)
+ AND t1_outer1.b = t1_outer2.b;
+EXPLAIN SELECT (SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+ FROM t1 AS t1_outer) x2 FROM t1 AS t1_outer2;
+
+CREATE TABLE t3 LIKE t1;
+FLUSH STATUS;
+INSERT INTO t3 SELECT a,MAX(b) FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+DELETE FROM t3;
+FLUSH STATUS;
+INSERT INTO t3 SELECT 1, (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2)
+ FROM t1 LIMIT 1;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+DELETE FROM t3 WHERE (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) > 10000;
+SHOW STATUS LIKE 'handler_read__e%';
+FLUSH STATUS;
+--error ER_SUBQUERY_NO_1_ROW
+DELETE FROM t3 WHERE (SELECT (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) x
+ FROM t1) > 10000;
+SHOW STATUS LIKE 'handler_read__e%';
+
+DROP TABLE t1,t2,t3;
+
+#
+# Bug#25602: queries with DISTINCT and SQL_BIG_RESULT hint
+# for which loose scan optimization is applied
+#
+
+eval CREATE TABLE t1 (a int, INDEX idx(a)) engine=$engine;
+INSERT INTO t1 VALUES
+ (4), (2), (1), (2), (4), (2), (1), (4),
+ (4), (2), (1), (2), (2), (4), (1), (4);
+ANALYZE TABLE t1;
+
+EXPLAIN SELECT DISTINCT(a) FROM t1;
+SELECT DISTINCT(a) FROM t1;
+EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+
+DROP TABLE t1;
+
+#
+# Bug #32268: Indexed queries give bogus MIN and MAX results
+#
+
+eval CREATE TABLE t1 (a INT, b INT) engine=$engine;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3);
+INSERT INTO t1 SELECT a + 1, b FROM t1;
+INSERT INTO t1 SELECT a + 2, b FROM t1;
+ANALYZE TABLE t1;
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+
+CREATE INDEX break_it ON t1 (a, b);
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+
+EXPLAIN
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+
+DROP TABLE t1;
+
+#
+# Bug#38195: Incorrect handling of aggregate functions when loose index scan is
+# used causes server crash.
+#
+create table t1 (a int, b int, primary key (a,b), key `index` (a,b)) engine=MyISAM;
+insert into t1 (a,b) values
+(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6),
+ (0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13),
+(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),
+ (1,7),(1,8),(1,9),(1,10),(1,11),(1,12),(1,13),
+(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),
+ (2,7),(2,8),(2,9),(2,10),(2,11),(2,12),(2,13),
+(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6),
+ (3,7),(3,8),(3,9),(3,10),(3,11),(3,12),(3,13);
+insert into t1 (a,b) select a, max(b)+1 from t1 where a = 0 group by a;
+ANALYZE TABLE t1;
+select * from t1;
+explain extended select sql_buffer_result a, max(b)+1 from t1 where a = 0 group by a;
+drop table t1;
+
+
+#
+# Bug #41610: key_infix_len can be overwritten causing some group by queries
+# to return no rows
+#
+
+eval CREATE TABLE t1 (a int, b int, c int, d int,
+ KEY foo (c,d,a,b), KEY bar (c,a,b,d)) engine=$engine;
+
+INSERT INTO t1 VALUES (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 1, 3), (1, 1, 1, 4);
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT a,b,c+1,d FROM t1;
+ANALYZE TABLE t1;
+
+#Should be non-empty
+EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4;
+SELECT DISTINCT c FROM t1 WHERE d=4;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Bug #45386: Wrong query result with MIN function in field list,
+--echo # WHERE and GROUP BY clause
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+
+--echo # test MIN
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+
+--echo # test MAX
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+
+--echo # test 3 ranges and use the middle one
+INSERT INTO t SELECT a, 2 FROM t;
+
+--echo #should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+--echo #should return 1 row
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+
+DROP TABLE t;
+
+--echo #
+--echo # Bug #48472: Loose index scan inappropriately chosen for some WHERE
+--echo # conditions
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+
+SELECT a, MAX(b) FROM t WHERE 0=b+0 GROUP BY a;
+
+DROP TABLE t;
+
+--echo End of 5.0 tests
+
+--echo #
+--echo # Bug #46607: Assertion failed: (cond_type == Item::FUNC_ITEM) results in
+--echo # server crash
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=$engine;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+
+SELECT a, MAX(b) FROM t WHERE b GROUP BY a;
+
+DROP TABLE t;
+
+#
+# BUG#49902 - SELECT returns incorrect results
+#
+eval CREATE TABLE t1(a INT NOT NULL, b INT NOT NULL, KEY (b)) engine=$engine;
+INSERT INTO t1 VALUES(1,1),(2,1);
+ANALYZE TABLE t1;
+SELECT 1 AS c, b FROM t1 WHERE b IN (1,2) GROUP BY c, b;
+SELECT a FROM t1 WHERE b=1;
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#47762: Incorrect result from MIN() when WHERE tests NOT NULL column
+--echo # for NULL
+--echo #
+
+--echo ## Test for NULLs allowed
+eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+--source include/min_null_cond.inc
+INSERT INTO t1 VALUES (NULL), (NULL);
+ANALYZE TABLE t1;
+--source include/min_null_cond.inc
+DROP TABLE t1;
+
+--echo ## Test for NOT NULLs
+eval CREATE TABLE t1 ( a INT NOT NULL PRIMARY KEY) engine=$engine;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+--echo #
+--echo # NULL-safe operator test disabled for non-NULL indexed columns.
+--echo #
+--echo # See bugs
+--echo #
+--echo # - Bug#52173: Reading NULL value from non-NULL index gives
+--echo # wrong result in embedded server
+--echo #
+--echo # - Bug#52174: Sometimes wrong plan when reading a MAX value from
+--echo # non-NULL index
+--echo #
+--let $skip_null_safe_test= 1
+--source include/min_null_cond.inc
+DROP TABLE t1;
+
+--echo #
+--echo # Bug#53859: Valgrind: opt_sum_query(TABLE_LIST*, List<Item>&, Item*) at
+--echo # opt_sum.cc:305
+--echo #
+eval CREATE TABLE t1 ( a INT, KEY (a) ) engine=$engine;
+INSERT INTO t1 VALUES (1), (2), (3);
+
+SELECT MIN( a ) AS min_a
+FROM t1
+WHERE a > 1 AND a IS NULL
+ORDER BY min_a;
+
+DROP TABLE t1;
+
+
+--echo End of 5.1 tests
+
+
+--echo #
+--echo # WL#3220 (Loose index scan for COUNT DISTINCT)
+--echo #
+
+eval CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b)) engine=$engine;
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+ANALYZE TABLE t1;
+eval CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, KEY (a,b,c)) engine=$engine;
+INSERT INTO t2 VALUES (1,1,1,1,1,1), (1,2,1,1,1,1), (1,3,1,1,1,1),
+ (1,4,1,1,1,1);
+INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2;
+INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2;
+ANALYZE TABLE t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+SELECT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+SELECT COUNT(DISTINCT a,b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+SELECT COUNT(DISTINCT b,a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+SELECT COUNT(DISTINCT b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+
+EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+
+EXPLAIN SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+
+EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1;
+SELECT COUNT(DISTINCT a), 12 FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2;
+SELECT COUNT(DISTINCT a, b, c) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+
+EXPLAIN SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+
+EXPLAIN SELECT COUNT(DISTINCT c, a, b) FROM t2
+ WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+SELECT COUNT(DISTINCT c, a, b) FROM t2
+ WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+ GROUP BY b;
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+ GROUP BY b;
+
+EXPLAIN SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+
+EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+ WHERE b = 13 AND c = 42 GROUP BY a;
+SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+ WHERE b = 13 AND c = 42 GROUP BY a;
+
+--echo # This query could have been resolved using loose index scan since
+--echo # the second part of count(..) is defined by a constant predicate
+EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+
+EXPLAIN SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+
+EXPLAIN SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+
+EXPLAIN SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+
+DROP TABLE t1,t2;
+
+--echo # end of WL#3220 tests
+
+--echo #
+--echo # Bug#50539: Wrong result when loose index scan is used for an aggregate
+--echo # function with distinct
+--echo #
+eval CREATE TABLE t1 (
+ f1 int(11) NOT NULL DEFAULT '0',
+ f2 char(1) NOT NULL DEFAULT '',
+ PRIMARY KEY (f1,f2)
+) engine=$engine;
+insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'),
+(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
+ANALYZE TABLE t1;
+
+SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+
+drop table t1;
+--echo # End of test#50539.
+
+--echo #
+--echo # Bug#17217128 - BAD INTERACTION BETWEEN MIN/MAX AND
+--echo # "HAVING SUM(DISTINCT)": WRONG RESULTS.
+--echo #
+
+eval CREATE TABLE t (a INT, b INT, KEY(a,b)) engine=$engine;
+INSERT INTO t VALUES (1,1), (2,2), (3,3), (4,4), (1,0), (3,2), (4,5);
+ANALYZE TABLE t;
+let $DEFAULT_TRACE_MEM_SIZE=1048576; # 1MB
+eval set optimizer_trace_max_mem_size=$DEFAULT_TRACE_MEM_SIZE;
+set @@session.optimizer_trace='enabled=on';
+set end_markers_in_json=on;
+
+ANALYZE TABLE t;
+
+SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+ FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+EXPLAIN SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+ FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+EXPLAIN SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+ FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+EXPLAIN SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+ FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+ FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+SET optimizer_trace_max_mem_size=DEFAULT;
+SET optimizer_trace=DEFAULT;
+SET end_markers_in_json=DEFAULT;
+
+DROP TABLE t;
+
+--echo #
+--echo # Bug#18109609: LOOSE INDEX SCAN IS NOT USED WHEN IT SHOULD
+--echo #
+
+eval CREATE TABLE t1 (
+id INT AUTO_INCREMENT PRIMARY KEY,
+c1 INT,
+c2 INT,
+KEY(c1,c2)) engine=$engine;
+
+INSERT INTO t1(c1,c2) VALUES
+(1, 1), (1,2), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3),
+(4,4), (4,5), (4,6), (4,7), (4,8), (4,9), (4,10), (4,11), (4,12), (4,13),
+(4,14), (4,15), (4,16), (4,17), (4,18), (4,19), (4,20),(5,5);
+ANALYZE TABLE t1;
+
+EXPLAIN SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+FLUSH STATUS;
+SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+SHOW SESSION STATUS LIKE 'Handler_read%';
+
+DROP TABLE t1;
+
+--echo # End of test for Bug#18109609
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc
new file mode 100644
index 00000000000..d9b4b46b25a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc
@@ -0,0 +1,23 @@
+# Common test pattern for options that control direct i/o
+#
+# Required input:
+# $io_option - name and assignment to enable on server command line
+
+--perl
+use Cwd 'abs_path';
+
+open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/data_in_shm.inc") or die;
+my $real_path= abs_path($ENV{'MYSQLTEST_VARDIR'});
+my $in_shm= (index($real_path, "/dev/shm") != -1) ||
+ (index($real_path, "/run/shm") != -1);
+print FILE "let \$DATA_IN_SHM= $in_shm;\n";
+close FILE;
+EOF
+
+--source $MYSQL_TMP_DIR/data_in_shm.inc
+--remove_file $MYSQL_TMP_DIR/data_in_shm.inc
+
+if ($DATA_IN_SHM)
+{
+ --skip DATADIR is in /{dev|run}/shm, possibly due to --mem
+}
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc
index c23717c4fda..34947cb0ecb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc
@@ -52,8 +52,9 @@ let $ID = `SELECT connection_id()`;
send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
connection con2;
-let $wait_condition = SELECT 1 FROM information_schema.processlist
- WHERE id = $ID AND state = "Sending data";
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data";
--source include/wait_condition.inc
eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
UPDATE t0 SET VALUE=VALUE+1 WHERE id=190000;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc
index da80f796750..8140b81a95e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc
@@ -52,8 +52,9 @@ let $ID = `SELECT connection_id()`;
send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
connection con2;
-let $wait_condition = SELECT 1 FROM information_schema.processlist
- WHERE id = $ID AND state = "Sending data";
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data";
--source include/wait_condition.inc
eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
INSERT INTO t0 VALUES(200001,1), (-1,1);
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc
index b77a54e4360..e28f1c90b3a 100644
--- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc
@@ -55,8 +55,9 @@ let $ID = `SELECT connection_id()`;
send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
connection con2;
-let $wait_condition = SELECT 1 FROM information_schema.processlist
- WHERE id = $ID AND state = "Sending data";
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID /* OR srv_id = $ID*/) AND state = "Sending data";
--source include/wait_condition.inc
eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
BEGIN;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc
index 9494146ba5c..13ceca07913 100644
--- a/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc
@@ -55,8 +55,9 @@ let $ID = `SELECT connection_id()`;
send SELECT * FROM t0 WHERE value > 0 FOR UPDATE;
connection con2;
-let $wait_condition = SELECT 1 FROM information_schema.processlist
- WHERE id = $ID AND state = "Sending data";
+let $wait_condition =
+ SELECT 1 FROM information_schema.processlist
+ WHERE (id = $ID/* OR srv_id = $ID*/) AND state = "Sending data";
--source include/wait_condition.inc
eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
BEGIN;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc b/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc
deleted file mode 100644
index 71e713226d7..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/include/rocksdb_concurrent_delete.inc
+++ /dev/null
@@ -1,53 +0,0 @@
-# Usage:
-#
-# let $order = ASC; # or DESC
-# let $comment = "rev:cf2"; # or ""
-# --source suite/rocksdb/include/rocksdb_concurrent_delete.inc
-
-let $first_row = -1; # Error this should never happen
-if ($order == 'ASC')
-{
- let $first_row = 1;
-}
-if ($order == 'DESC')
-{
- let $first_row = 3;
-}
-
-connect (con, localhost, root,,);
-connection default;
-
---disable_warnings
-SET debug_sync='RESET';
-DROP TABLE IF EXISTS t1;
---enable_warnings
-
-eval CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT $comment, a INT);
-INSERT INTO t1 VALUES(1,1), (2,2), (3,3);
-
-# This will cause the SELECT to block after finding the first row, but
-# before locking and reading it.
-connection con;
-SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
-send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE;
-
-# While that connection is waiting, delete the first row (the one con
-# is about to lock and read
-connection default;
-SET debug_sync='now WAIT_FOR parked';
-eval DELETE FROM t1 WHERE pk = $first_row;
-
-# Signal the waiting select to continue
-SET debug_sync='now SIGNAL go';
-
-# Now get the results from the select. The first entry (1,1) (or (3,3) when
-# using reverse ordering) should be missing. Prior to the fix the SELECT
-# would have returned: "1815: Internal error: NotFound:"
-connection con;
-reap;
-
-# Cleanup
-connection default;
-disconnect con;
-set debug_sync='RESET';
-drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc
new file mode 100644
index 00000000000..da16e1c9c3b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc
@@ -0,0 +1,23 @@
+# Common test pattern for options that control direct i/o
+#
+# Required input:
+# $io_option - name and assignment to enable on server command line
+
+--source include/have_direct_io.inc
+
+--echo Checking direct reads
+--let $_mysqld_option=$io_option
+--source include/restart_mysqld_with_option.inc
+
+CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1, 1,'a');
+INSERT INTO t1 (a,b) VALUES (2,'b');
+set global rocksdb_force_flush_memtable_now=1;
+--sorted_result
+SELECT a,b FROM t1;
+DROP TABLE t1;
+
+# cleanup
+--let _$mysqld_option=
+--source include/restart_mysqld.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
index 0617232f1e3..e7883f7e03e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result
@@ -17,7 +17,7 @@ ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE;
set @tmp= @@rocksdb_max_row_locks;
set session rocksdb_max_row_locks=1000;
ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
-ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to max_num_locks limit' from ROCKSDB
+ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to rocksdb_max_row_locks limit' from ROCKSDB
set session rocksdb_bulk_load=1;
ALTER TABLE t1 ADD INDEX kb_copy(b), ALGORITHM=COPY;
set session rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
index f59b841a595..cc47ceff7ca 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result
@@ -176,3 +176,24 @@ INSERT INTO t1 (a) VALUES (1);
UPDATE t1 SET pk = 3;
ALTER TABLE t1 AUTO_INCREMENT 2;
DROP TABLE t1;
+#----------------------------------
+# Issue #902 Debug assert in autoincrement with small field type
+#----------------------------------
+SET auto_increment_increment=100, auto_increment_offset=10;
+CREATE TABLE t1(i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615;
+INSERT INTO t1 VALUES (NULL);
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+i
+ALTER TABLE t1 AUTO_INCREMENT=1;
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+i
+10
+ALTER TABLE t1 AUTO_INCREMENT=18446744073709551615;
+INSERT INTO t1 VALUES (NULL);
+ERROR HY000: Failed to read auto-increment value from storage engine
+SELECT * FROM t1;
+i
+10
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result
index a3fc25cc81b..973d1876fa0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_without_tx_api.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result
@@ -3,6 +3,7 @@ Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key;
set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api;
DROP TABLE IF EXISTS t1,t2;
@@ -17,6 +18,7 @@ SELECT count(*) FROM t1;
count(*)
9000
include/sync_slave_sql_with_master.inc
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SELECT count(*) FROM t1;
count(*)
9000
@@ -71,14 +73,14 @@ count(*)
call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*");
call mtr.add_suppression("Slave: Can't find record in 't1'.*");
include/wait_for_slave_sql_error.inc [errno=1032]
-set @save_rocksdb_read_free_rpl_tables=@@global.rocksdb_read_free_rpl_tables;
-set global rocksdb_read_free_rpl_tables="t.*";
+set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl;
+set global rocksdb_read_free_rpl=PK_SK;
START SLAVE;
include/sync_slave_sql_with_master.inc
SELECT count(*) FROM t1;
count(*)
7000
-set global rocksdb_read_free_rpl_tables=@save_rocksdb_read_free_rpl_tables;
+set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl;
SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key;
SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api;
DROP TABLE t1, t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result
new file mode 100644
index 00000000000..683b672e360
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result
@@ -0,0 +1,87 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key;
+set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api;
+DROP TABLE IF EXISTS t1,t2;
+create table t1 (id int primary key, value int, value2 varchar(200)) engine=rocksdb;
+create table t2 (id int primary key, value int, value2 varchar(200), index(value)) engine=rocksdb;
+SET session rocksdb_blind_delete_primary_key=1;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+1000
+SELECT count(*) FROM t1;
+count(*)
+9000
+include/sync_slave_sql_with_master.inc
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SELECT count(*) FROM t1;
+count(*)
+9000
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+0
+SELECT count(*) FROM t2;
+count(*)
+9000
+SET session rocksdb_master_skip_tx_api=1;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+1000
+SELECT count(*) FROM t1;
+count(*)
+8000
+SELECT count(*) FROM t2;
+count(*)
+8000
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+8000
+SELECT count(*) FROM t2;
+count(*)
+8000
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+DELETE FROM t1 WHERE id BETWEEN 3001 AND 4000;
+DELETE FROM t2 WHERE id BETWEEN 3001 AND 4000;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_deleted_blind';
+variable_value-@c
+0
+SELECT count(*) FROM t1;
+count(*)
+7000
+SELECT count(*) FROM t2;
+count(*)
+7000
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+7000
+SELECT count(*) FROM t2;
+count(*)
+7000
+DELETE FROM t1 WHERE id = 10;
+SELECT count(*) FROM t1;
+count(*)
+7000
+call mtr.add_suppression("Slave SQL.*Could not execute Delete_rows event on table test.t1.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 't1'.*");
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl;
+set global rocksdb_read_free_rpl=PK_SK;
+START SLAVE;
+include/sync_slave_sql_with_master.inc
+SELECT count(*) FROM t1;
+count(*)
+7000
+set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl;
+SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key;
+SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api;
+DROP TABLE t1, t2;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result
index 6ad9867049d..f3c4fdf1040 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result
@@ -20,6 +20,24 @@ id1 id2 link_type visibility data time version
select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
case when variable_value-@c > 0 then 'true' else 'false' end
true
+# MariaDB: we don't have optimizer_force_index_for_range, but we can use EITS
+# to get the query plan we want.
+set @tmp_use_stat_tables= @@use_stat_tables;
+set use_stat_tables='preferably';
+analyze table linktable persistent for all;
+Table Op Msg_type Msg_text
+test.linktable analyze status Engine-independent statistics collected
+test.linktable analyze status OK
+flush tables;
+explain select * from linktable;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE linktable ALL NULL NULL NULL NULL 10000
+# This must use range(id1_type2), key_len=24
+explain
+select id1, id2, link_type, visibility, data, time, version from linktable
+FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE linktable range id1_type2 id1_type2 24 NULL 1000 Using where; Using index
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
id1 id2 link_type visibility data time version
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
index 4f6702b85a7..daf4f5e30ba 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result
@@ -59,4 +59,27 @@ insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
# This must not fail an assert:
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
pk kp1 kp2 col1
-drop table t1,t2,t3,t4;
+#
+# Issue #881: Issue #809 still occurs for reverse scans on forward cfs
+#
+create table t5 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'bf5_1'
+) engine=ROCKSDB;
+insert into t5 select * from t1;
+set global rocksdb_force_flush_memtable_now=1;
+# An index scan starting from the end of the table:
+explain
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t5 index NULL PRIMARY 122 NULL 1
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+id1 id2 id3 id4 id5 value value2
+1000 2000 2000 10000 10000 1000 aaabbbccc
+drop table t1,t2,t3,t4,t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
index 737d3a3befa..3af4585d298 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
@@ -22,19 +22,20 @@ KEY(a)
) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 0
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 1
-connection default;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
set rocksdb_bulk_load=1;
set rocksdb_bulk_load_size=100000;
LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
index b4ef029e5c4..9100f04965e 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
@@ -22,19 +22,20 @@ KEY(a)
) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 0
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 1
-connection default;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
set rocksdb_bulk_load=1;
set rocksdb_bulk_load_size=100000;
LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
index b7d5a9c922f..8aeafeac8cb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -22,19 +22,20 @@ KEY(a)
) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 0
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 1
-connection default;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
set rocksdb_bulk_load=1;
set rocksdb_bulk_load_size=100000;
LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
index 7ccc39f3582..4f7fbee249d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
@@ -22,19 +22,20 @@ KEY(a)
) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 0
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 1
-connection default;
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
+SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS=
+'cf1={write_buffer_size=8m;target_file_size_base=1m};';
set rocksdb_bulk_load=1;
set rocksdb_bulk_load_size=100000;
LOAD DATA INFILE <input_file> INTO TABLE t1;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t2;
+pk a b
LOAD DATA INFILE <input_file> INTO TABLE t3;
+pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
index 40274c337c3..1041e96b802 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
@@ -59,13 +59,10 @@ CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 0
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 1
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
connection default;
set rocksdb_bulk_load=1;
set rocksdb_bulk_load_size=100000;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
index 1aaac8ec268..34b14e9e5de 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result
@@ -59,13 +59,10 @@ CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "rev:cf1")
ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4;
connect other,localhost,root,,;
set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 0
start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-STAT_TYPE VALUE
-DB_NUM_SNAPSHOTS 1
+select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+Has opened snapshots
+1
connection default;
set rocksdb_bulk_load=1;
set rocksdb_bulk_load_size=100000;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result
new file mode 100644
index 00000000000..1f687dfec53
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result
@@ -0,0 +1,693 @@
+CREATE TABLE `link_table` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE TABLE `link_table2` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`)
+COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9;
+insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125);
+insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125);
+insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125);
+insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125);
+insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125);
+insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125);
+insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125);
+insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125);
+insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125);
+insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125);
+insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125);
+insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125);
+insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125);
+insert into link_table2 select * from link_table;
+CREATE TABLE `id_table` (
+`id` bigint(20) NOT NULL DEFAULT '0',
+`type` int(11) NOT NULL DEFAULT '0',
+`row_created_time` int(11) NOT NULL DEFAULT '0',
+`hash_key` varchar(255) NOT NULL DEFAULT '',
+`is_deleted` tinyint(4) DEFAULT '0',
+PRIMARY KEY (`id`),
+KEY `type_id` (`type`,`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
+KEY_BLOCK_SIZE=8;
+insert into id_table values (1, 1, 10, '111', 0);
+insert into id_table values (2, 1, 10, '111', 1);
+insert into id_table values (3, 1, 10, '111', 0);
+insert into id_table values (4, 1, 10, '111', 1);
+insert into id_table values (5, 1, 10, '111', 0);
+insert into id_table values (6, 1, 10, '111', 1);
+insert into id_table values (7, 1, 10, '111', 0);
+insert into id_table values (8, 1, 10, '111', 1);
+insert into id_table values (9, 1, 10, '111', 0);
+insert into id_table values (10, 1, 10, '111', 1);
+CREATE TABLE `node_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+`update_time` int(10) unsigned NOT NULL DEFAULT '0',
+`data` mediumtext COLLATE latin1_bin NOT NULL,
+PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id',
+KEY `id` (`id`) COMMENT 'cf_node'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into node_table values (1, 1, 1, 10, 'data');
+insert into node_table values (2, 1, 1, 10, 'data');
+insert into node_table values (3, 1, 1, 10, 'data');
+insert into node_table values (4, 1, 1, 10, 'data');
+insert into node_table values (5, 1, 1, 10, 'data');
+insert into node_table values (6, 1, 1, 10, 'data');
+insert into node_table values (7, 1, 1, 10, 'data');
+insert into node_table values (8, 1, 1, 10, 'data');
+insert into node_table values (9, 1, 1, 10, 'data');
+insert into node_table values (10, 1, 1, 10, 'data');
+CREATE TABLE `count_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`count` int(10) unsigned NOT NULL DEFAULT '0',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into count_table values (2, 1, 1, 1, 10, 20);
+insert into count_table values (3, 1, 1, 1, 10, 20);
+insert into count_table values (4, 1, 1, 1, 10, 20);
+insert into count_table values (5, 1, 1, 1, 10, 20);
+insert into count_table values (6, 1, 1, 1, 10, 20);
+insert into count_table values (7, 1, 1, 1, 10, 20);
+insert into count_table values (8, 1, 1, 1, 10, 20);
+insert into count_table values (9, 1, 1, 1, 10, 20);
+insert into count_table values (10, 1, 1, 1, 10, 20);
+CREATE TABLE `link_table5` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table3` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table6` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,
+`data`(255)) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table6 values (1, 1, 2, 2, 1, 1,
+'data12_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 3, 2, 1, 2,
+'data13_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 4, 2, 1, 2,
+'data14_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 5, 2, 1, 1,
+'data15_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 1, 2, 1, 1,
+'data21_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 2, 2, 1, 1,
+'data22_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 3, 2, 1, 1,
+'data32_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+CREATE TABLE `link_table4` (
+`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
+`raw_key` text COLLATE latin1_bin,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1);
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version
+from link_table WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+# Point query
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1) and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+2 2 1 2 a10 125
+2 3 1 2 a11 125
+2 4 1 2 a11 125
+# Prefix range query
+# Prefix range query with SK
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME ASC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+# Prefix range query with SK with limits
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,10;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,5;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,1;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,10;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,5;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,1;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,10;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,5;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,1;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,0;
+id1 id2 link_type visibility data time version
+# Prefix range query with PK
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type
+1 10 3
+1 9 3
+1 8 3
+1 7 3
+1 6 3
+1 5 3
+1 4 3
+1 3 3
+1 2 3
+1 1 3
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type
+1 1 3
+1 2 3
+1 3 3
+1 4 3
+1 5 3
+1 6 3
+1 7 3
+1 8 3
+1 9 3
+1 10 3
+# Prefix range query with PK + value
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 5 3 3 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 2 3 3 a10 10 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type visibility data time version
+1 1 3 4 a10 10 125
+1 2 3 3 a10 10 125
+1 3 3 4 a11 11 125
+1 4 3 4 a11 11 125
+1 5 3 3 a12 12 125
+1 6 3 4 a12 12 125
+1 7 3 4 a12 12 125
+1 8 3 4 a13 13 125
+1 9 3 4 a14 14 125
+1 10 3 4 a15 15 125
+# Transaction
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+COMMIT;
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 cde 125
+ROLLBACK;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+# Data types
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True;
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL;
+id1
+DROP TABLE count_table;
+DROP TABLE link_table;
+DROP TABLE link_table3;
+DROP TABLE link_table2;
+DROP TABLE id_table;
+DROP TABLE node_table;
+DROP TABLE link_table5;
+DROP TABLE link_table6;
+DROP TABLE link_table4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result
new file mode 100644
index 00000000000..1f687dfec53
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result
@@ -0,0 +1,693 @@
+CREATE TABLE `link_table` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+CREATE TABLE `link_table2` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0' ,
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+`visibility` tinyint(3) NOT NULL DEFAULT '0' ,
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' ,
+`time` int(10) unsigned NOT NULL DEFAULT '0' ,
+`version` bigint(20) unsigned NOT NULL DEFAULT '0' ,
+PRIMARY KEY (`link_type` , `id1` , `id2`)
+COMMENT 'cf_link' ,
+KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` ,
+`version` , `data`) COMMENT 'cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9;
+insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125);
+insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125);
+insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125);
+insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125);
+insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125);
+insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125);
+insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125);
+insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125);
+insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125);
+insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125);
+insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125);
+insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125);
+insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125);
+insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125);
+insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125);
+insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125);
+insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125);
+insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125);
+insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125);
+insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125);
+insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125);
+insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125);
+insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125);
+insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125);
+insert into link_table2 select * from link_table;
+CREATE TABLE `id_table` (
+`id` bigint(20) NOT NULL DEFAULT '0',
+`type` int(11) NOT NULL DEFAULT '0',
+`row_created_time` int(11) NOT NULL DEFAULT '0',
+`hash_key` varchar(255) NOT NULL DEFAULT '',
+`is_deleted` tinyint(4) DEFAULT '0',
+PRIMARY KEY (`id`),
+KEY `type_id` (`type`,`id`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED
+KEY_BLOCK_SIZE=8;
+insert into id_table values (1, 1, 10, '111', 0);
+insert into id_table values (2, 1, 10, '111', 1);
+insert into id_table values (3, 1, 10, '111', 0);
+insert into id_table values (4, 1, 10, '111', 1);
+insert into id_table values (5, 1, 10, '111', 0);
+insert into id_table values (6, 1, 10, '111', 1);
+insert into id_table values (7, 1, 10, '111', 0);
+insert into id_table values (8, 1, 10, '111', 1);
+insert into id_table values (9, 1, 10, '111', 0);
+insert into id_table values (10, 1, 10, '111', 1);
+CREATE TABLE `node_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+`update_time` int(10) unsigned NOT NULL DEFAULT '0',
+`data` mediumtext COLLATE latin1_bin NOT NULL,
+PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id',
+KEY `id` (`id`) COMMENT 'cf_node'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into node_table values (1, 1, 1, 10, 'data');
+insert into node_table values (2, 1, 1, 10, 'data');
+insert into node_table values (3, 1, 1, 10, 'data');
+insert into node_table values (4, 1, 1, 10, 'data');
+insert into node_table values (5, 1, 1, 10, 'data');
+insert into node_table values (6, 1, 1, 10, 'data');
+insert into node_table values (7, 1, 1, 10, 'data');
+insert into node_table values (8, 1, 1, 10, 'data');
+insert into node_table values (9, 1, 1, 10, 'data');
+insert into node_table values (10, 1, 1, 10, 'data');
+CREATE TABLE `count_table` (
+`id` bigint(20) unsigned NOT NULL DEFAULT '0',
+`type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`count` int(10) unsigned NOT NULL DEFAULT '0',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into count_table values (2, 1, 1, 1, 10, 20);
+insert into count_table values (3, 1, 1, 1, 10, 20);
+insert into count_table values (4, 1, 1, 1, 10, 20);
+insert into count_table values (5, 1, 1, 1, 10, 20);
+insert into count_table values (6, 1, 1, 1, 10, 20);
+insert into count_table values (7, 1, 1, 1, 10, 20);
+insert into count_table values (8, 1, 1, 1, 10, 20);
+insert into count_table values (9, 1, 1, 1, 10, 20);
+insert into count_table values (10, 1, 1, 1, 10, 20);
+CREATE TABLE `link_table5` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table3` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1);
+CREATE TABLE `link_table6` (
+`id1` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id1_type` int(10) unsigned NOT NULL DEFAULT '0',
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(4) NOT NULL DEFAULT '0',
+`data` text COLLATE latin1_bin NOT NULL,
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,
+`data`(255)) COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4;
+insert into link_table6 values (1, 1, 2, 2, 1, 1,
+'data12_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 3, 2, 1, 2,
+'data13_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 4, 2, 1, 2,
+'data14_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (1, 1, 5, 2, 1, 1,
+'data15_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 1, 2, 1, 1,
+'data21_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 2, 2, 1, 1,
+'data22_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+insert into link_table6 values (2, 1, 3, 2, 1, 1,
+'data32_12345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890123456789012345678901234567890123456789'
+ '0123456789012345678901234567890', 1, 1);
+CREATE TABLE `link_table4` (
+`id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
+`raw_key` text COLLATE latin1_bin,
+`id2` bigint(20) unsigned NOT NULL DEFAULT '0',
+`id2_type` int(10) unsigned NOT NULL DEFAULT '0',
+`link_type` bigint(20) unsigned NOT NULL DEFAULT '0',
+`visibility` tinyint(3) NOT NULL DEFAULT '0',
+`data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '',
+`time` int(10) unsigned NOT NULL DEFAULT '0',
+`version` bigint(20) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link',
+KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`)
+COMMENT 'rev:cf_link_id1_type'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin
+ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8;
+insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1);
+insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1);
+insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1);
+insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1);
+insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1);
+insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1);
+insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1);
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version
+from link_table WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+# Point query
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1) and id2 IN (2) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+1 3 1 2 a11 125
+1 4 1 2 a11 125
+2 2 1 2 a10 125
+2 3 1 2 a11 125
+2 4 1 2 a11 125
+# Prefix range query
+# Prefix range query with SK
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME ASC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 2 3 3 a10 10 125
+# Prefix range query with SK with limits
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,10;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,5;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,1;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,10;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,5;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,1;
+id1 id2 link_type visibility data time version
+1 9 3 4 a14 14 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,10;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,5;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,1;
+id1 id2 link_type visibility data time version
+1 4 3 4 a11 11 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,0;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,10;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,5;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,1;
+id1 id2 link_type visibility data time version
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,0;
+id1 id2 link_type visibility data time version
+# Prefix range query with PK
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type
+1 10 3
+1 9 3
+1 8 3
+1 7 3
+1 6 3
+1 5 3
+1 4 3
+1 3 3
+1 2 3
+1 1 3
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type
+1 1 3
+1 2 3
+1 3 3
+1 4 3
+1 5 3
+1 6 3
+1 7 3
+1 8 3
+1 9 3
+1 10 3
+# Prefix range query with PK + value
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+id1 id2 link_type visibility data time version
+1 10 3 4 a15 15 125
+1 9 3 4 a14 14 125
+1 8 3 4 a13 13 125
+1 7 3 4 a12 12 125
+1 6 3 4 a12 12 125
+1 5 3 3 a12 12 125
+1 4 3 4 a11 11 125
+1 3 3 4 a11 11 125
+1 2 3 3 a10 10 125
+1 1 3 4 a10 10 125
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+id1 id2 link_type visibility data time version
+1 1 3 4 a10 10 125
+1 2 3 3 a10 10 125
+1 3 3 4 a11 11 125
+1 4 3 4 a11 11 125
+1 5 3 3 a12 12 125
+1 6 3 4 a12 12 125
+1 7 3 4 a12 12 125
+1 8 3 4 a13 13 125
+1 9 3 4 a14 14 125
+1 10 3 4 a15 15 125
+# Transaction
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 a10 125
+UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+COMMIT;
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 cde 125
+ROLLBACK;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+id1 id2 id1_type id2_type data version
+1 2 1 2 bcd 125
+# Data types
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1";
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True;
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01';
+id1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL;
+id1
+DROP TABLE count_table;
+DROP TABLE link_table;
+DROP TABLE link_table3;
+DROP TABLE link_table2;
+DROP TABLE id_table;
+DROP TABLE node_table;
+DROP TABLE link_table5;
+DROP TABLE link_table6;
+DROP TABLE link_table4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result
new file mode 100644
index 00000000000..12c5bc4f85c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/check_flags.result
@@ -0,0 +1,66 @@
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+connect conn1, localhost, root,,;
+connection default;
+CREATE TABLE t1 (id INT, value INT, KEY (id), KEY (value)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (id INT, value INT) ENGINE=ROCKSDB;
+CREATE TABLE t3 (id INT, kp1 INT, PRIMARY KEY (id), KEY(kp1)) ENGINE=ROCKSDB COMMENT='ttl_duration=1';
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+INSERT INTO t2 SELECT * FROM t1;
+INSERT INTO t3 SELECT * FROM t1;
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi SIGNAL parked WAIT_FOR go';
+SELECT value FROM t1 WHERE value = 3;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi_scan SIGNAL parked WAIT_FOR go';
+SELECT DISTINCT(id) FROM t1 WHERE value = 5 AND id IN (1, 3, 5);
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_inwd SIGNAL parked WAIT_FOR go';
+SELECT value FROM t1 WHERE value > 3;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_rnwd SIGNAL parked WAIT_FOR go';
+SELECT id FROM t2;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+set debug_sync='RESET';
+connection conn1;
+set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go';
+SELECT kp1 FROM t3 ORDER BY kp1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+KILL QUERY $conn1_id;
+set debug_sync='now SIGNAL go';
+connection conn1;
+ERROR 70100: Query execution was interrupted
+connection default;
+disconnect conn1;
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result
index e5aeb57ebdf..1c45cfd09fe 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result
@@ -8,7 +8,7 @@ ERROR HY000: Table without primary key cannot be created outside mysql schema.
CREATE TABLE IF NOT EXISTS mysql_table_2 (a INT) ENGINE=ROCKSDB;
ERROR HY000: Table without primary key cannot be created outside mysql schema.
CREATE TABLE mysql_table_no_cols ENGINE=ROCKSDB;
-ERROR HY000: Table without primary key cannot be created outside mysql schema.
+ERROR 42000: A table must have at least 1 column
CREATE TABLE mysql.mysql_table_2 (a INT) ENGINE=ROCKSDB;
CREATE TABLE mysql_primkey (a INT PRIMARY KEY, b INT, c INT, d INT, INDEX (c)) ENGINE=ROCKSDB;
ALTER TABLE mysql_primkey DROP b, DROP a, ADD (f INT PRIMARY KEY);
@@ -29,10 +29,24 @@ DROP INDEX `PRIMARY` ON mysql_primkey4;
ERROR HY000: Table without primary key cannot be created outside mysql schema.
ALTER TABLE mysql.mysql_table ADD PRIMARY KEY (a);
ALTER TABLE mysql.mysql_table DROP PRIMARY KEY;
+SET default_storage_engine=ROCKSDB;
+CREATE TABLE mysql_noeng(a INT, b INT);
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+SET sql_mode="";
+CREATE TABLE mysql_noeng_sub(a INT, b INT) ENGINE=BOGUS_ENGINE;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
+CREATE TABLE mysql_primkey5 LIKE mysql_primkey;
+SET @@global.block_create_no_primary_key = false;
+CREATE TABLE mysql_no_primkey (a INT) ENGINE=ROCKSDB;
+SET @@global.block_create_no_primary_key = true;
+CREATE TABLE mysql_block_no_primkey LIKE mysql_no_primkey;
+ERROR HY000: Table without primary key cannot be created outside mysql schema.
DROP TABLE mysql_primkey;
DROP TABLE mysql_primkey2;
DROP TABLE mysql_primkey3;
DROP TABLE mysql_primkey4;
+DROP TABLE mysql_primkey5;
+DROP TABLE mysql_no_primkey;
USE mysql;
DROP TABLE mysql_table;
DROP TABLE mysql_table_2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
index 50733f81598..1e2636c873a 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
@@ -36,8 +36,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
alter table t1 modify i bigint;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -52,7 +52,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -89,8 +89,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
alter table t1 rename t1_new;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -105,7 +105,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
select * from t1_new;
i
1
@@ -143,8 +143,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop table t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -160,8 +160,8 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -193,8 +193,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop table t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -209,7 +209,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -241,9 +241,9 @@ connection: default (for show processlist)
# both con1 and con2 exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: con2
alter table t1 modify i bigint;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -259,9 +259,9 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -293,8 +293,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
create index idx1 on t1 (i);;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -309,7 +309,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -333,8 +333,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop index idx1 on t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -349,7 +349,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -381,8 +381,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
truncate t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -397,7 +397,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -429,8 +429,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -445,7 +445,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -469,8 +469,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop trigger ins_sum;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -485,7 +485,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -517,8 +517,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
optimize table t1;;
Table Op Msg_type Msg_text
@@ -537,7 +537,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 1;
@@ -569,8 +569,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
lock tables t1 write;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -585,7 +585,7 @@ set high_priority_ddl = 0;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
unlock tables;
drop user test_user1@localhost;
drop user test_user2@localhost;
@@ -628,8 +628,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
alter table t1 modify i bigint;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -637,7 +637,7 @@ alter high_priority table t1 modify i bigint;;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -674,8 +674,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
alter table t1 rename t1_new;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -683,7 +683,7 @@ alter high_priority table t1 rename t1_new;;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
select * from t1_new;
i
1
@@ -721,8 +721,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop table t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -731,8 +731,8 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -764,8 +764,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop table t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -773,7 +773,7 @@ drop high_priority table t1;;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -805,9 +805,9 @@ connection: default (for show processlist)
# both con1 and con2 exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: con2
alter table t1 modify i bigint;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -816,9 +816,9 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -850,8 +850,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
create index idx1 on t1 (i);;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -859,7 +859,7 @@ create high_priority index idx1 on t1 (i);;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -883,8 +883,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop index idx1 on t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -892,7 +892,7 @@ drop high_priority index idx1 on t1;;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -924,8 +924,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
truncate t1;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -933,7 +933,7 @@ truncate high_priority t1;;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -965,8 +965,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -974,7 +974,7 @@ create high_priority trigger ins_sum before insert on t1 for each row set @sum =
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -998,8 +998,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
drop trigger ins_sum;;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
@@ -1007,7 +1007,7 @@ drop high_priority trigger ins_sum;;
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
## Test parameters:
## use_sys_var = 0;
@@ -1039,8 +1039,8 @@ connection: default (for show processlist)
# both con1 and default exist
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
-<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
connection: default
optimize table t1;;
Table Op Msg_type Msg_text
@@ -1052,7 +1052,7 @@ test.t1 optimize status OK
connection: default (for show processlist)
show processlist;
Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
-<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> <SrvID>
drop user test_user1@localhost;
drop user test_user2@localhost;
drop table if exists t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/delete_before_lock.result b/storage/rocksdb/mysql-test/rocksdb/r/delete_before_lock.result
deleted file mode 100644
index a8ea5e1677f..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/r/delete_before_lock.result
+++ /dev/null
@@ -1,22 +0,0 @@
-connect con, localhost, root,,;
-connection default;
-set debug_sync='RESET';
-drop table if exists t1;
-create table t1 (id1 int, id2 int, value int, primary key (id1, id2)) engine=rocksdb;
-insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1), (2, 2, 2);
-connection con;
-set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
-update t1 set value=100 where id1=1;
-connection default;
-set debug_sync='now WAIT_FOR parked';
-delete from t1 where id1=1 and id2=1;
-set debug_sync='now SIGNAL go';
-connection con;
-select * from t1 where id1=1 for update;
-id1 id2 value
-1 2 100
-1 3 100
-connection default;
-disconnect con;
-set debug_sync='RESET';
-drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result b/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result
new file mode 100644
index 00000000000..4386ad590ae
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result
@@ -0,0 +1,38 @@
+create table t1 (
+pk int not null primary key,
+col1 varchar(10)
+) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3);
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+select * from t1 where pk=1;
+# testing unclean shutdown on stuck instance
+# Run shutdown sql command with forcing kill (exit code 127)
+shutdown 1;
+Got one of the listed errors
+# verifying exit code is printed
+# restart the server
+shutdown 230;
+Got one of the listed errors
+# restart the server
+# verifying SHUTDOWN is refused if exit code > 255
+SHUTDOWN 256;
+ERROR HY000: exit code must be 0..255
+SHUTDOWN 10000;
+ERROR HY000: exit code must be 0..255
+# verifying SHUTDOWN is refused if instances are not read only
+SHUTDOWN 0 read_only;
+ERROR HY000: Only read_only instance can be killed.
+SHUTDOWN 127 read_only;
+ERROR HY000: Only read_only instance can be killed.
+SHUTDOWN 127;
+Got one of the listed errors
+# restart the server
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+select * from t1 where pk=1;
+SET GLOBAL read_only=1;
+# verifying SHUTDOWN read_only works with read_only instance
+# Run shutdown sql command with forcing kill (exit code 127)
+shutdown 255 read_only;
+Got one of the listed errors
+# restart the server
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result b/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result
new file mode 100644
index 00000000000..7fede0ac603
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result
@@ -0,0 +1,3503 @@
+set global debug="+d,force_group_by";
+drop table if exists t1;
+create table t1 (
+a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(248) default ' '
+) engine=RocksDB;
+insert into t1 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4'),
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4'),
+('d','a','a','a411','xy1'),('d','a','a','b411','xy2'),('d','a','a','c411','xy3'),('d','a','a','d411','xy4'),
+('d','a','b','e412','xy1'),('d','a','b','f412','xy2'),('d','a','b','g412','xy3'),('d','a','b','h412','xy4'),
+('d','b','a','i421','xy1'),('d','b','a','j421','xy2'),('d','b','a','k421','xy3'),('d','b','a','l421','xy4'),
+('d','b','b','m422','xy1'),('d','b','b','n422','xy2'),('d','b','b','o422','xy3'),('d','b','b','p422','xy4');
+create index idx_t1_0 on t1 (a1);
+create index idx_t1_1 on t1 (a1,a2,b,c);
+create index idx_t1_2 on t1 (a1,a2,b);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+drop table if exists t2;
+create table t2 (
+a1 char(64), a2 char(64) not null, b char(16), c char(16), d char(16), dummy char(248) default ' '
+) engine=RocksDB;
+insert into t2 select * from t1;
+insert into t2 (a1, a2, b, c, d) values
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),
+('a','a',NULL,'a777','xyz'),('a','a',NULL,'a888','xyz'),('a','a',NULL,'a999','xyz'),
+('a','a','a',NULL,'xyz'),
+('a','a','b',NULL,'xyz'),
+('a','b','a',NULL,'xyz'),
+('c','a',NULL,'c777','xyz'),('c','a',NULL,'c888','xyz'),('c','a',NULL,'c999','xyz'),
+('d','b','b',NULL,'xyz'),
+('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),('e','a','a',NULL,'xyz'),
+('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz'),('e','a','b',NULL,'xyz');
+create index idx_t2_0 on t2 (a1);
+create index idx_t2_1 on t2 (a1,a2,b,c);
+create index idx_t2_2 on t2 (a1,a2,b);
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+drop table if exists t3;
+create table t3 (
+a1 char(1), a2 char(1), b char(1), c char(4) not null, d char(3), dummy char(1) default ' '
+) engine=RocksDB;
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+insert into t3 (a1, a2, b, c, d) values
+('a','a','a','a111','xy1'),('a','a','a','b111','xy2'),('a','a','a','c111','xy3'),('a','a','a','d111','xy4'),
+('a','a','b','e112','xy1'),('a','a','b','f112','xy2'),('a','a','b','g112','xy3'),('a','a','b','h112','xy4'),
+('a','b','a','i121','xy1'),('a','b','a','j121','xy2'),('a','b','a','k121','xy3'),('a','b','a','l121','xy4'),
+('a','b','b','m122','xy1'),('a','b','b','n122','xy2'),('a','b','b','o122','xy3'),('a','b','b','p122','xy4'),
+('b','a','a','a211','xy1'),('b','a','a','b211','xy2'),('b','a','a','c211','xy3'),('b','a','a','d211','xy4'),
+('b','a','b','e212','xy1'),('b','a','b','f212','xy2'),('b','a','b','g212','xy3'),('b','a','b','h212','xy4'),
+('b','b','a','i221','xy1'),('b','b','a','j221','xy2'),('b','b','a','k221','xy3'),('b','b','a','l221','xy4'),
+('b','b','b','m222','xy1'),('b','b','b','n222','xy2'),('b','b','b','o222','xy3'),('b','b','b','p222','xy4'),
+('c','a','a','a311','xy1'),('c','a','a','b311','xy2'),('c','a','a','c311','xy3'),('c','a','a','d311','xy4'),
+('c','a','b','e312','xy1'),('c','a','b','f312','xy2'),('c','a','b','g312','xy3'),('c','a','b','h312','xy4'),
+('c','b','a','i321','xy1'),('c','b','a','j321','xy2'),('c','b','a','k321','xy3'),('c','b','a','l321','xy4'),
+('c','b','b','m322','xy1'),('c','b','b','n322','xy2'),('c','b','b','o322','xy3'),('c','b','b','p322','xy4');
+create index idx_t3_0 on t3 (a1);
+create index idx_t3_1 on t3 (a1,a2,b,c);
+create index idx_t3_2 on t3 (a1,a2,b);
+analyze table t3;
+Table Op Msg_type Msg_text
+test.t3 analyze status OK
+explain select a1, min(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by
+explain select a1, max(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 65 NULL 126 Using index for group-by
+explain select a1, min(a2), max(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by
+explain select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 # NULL # Using index for group-by
+explain select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by
+explain select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by
+explain select min(a2) from t1 group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using index for group-by
+explain select a2, min(c), max(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by
+select a1, min(a2) from t1 group by a1;
+a1 min(a2)
+a a
+b a
+c a
+d a
+select a1, max(a2) from t1 group by a1;
+a1 max(a2)
+a b
+b b
+c b
+d b
+select a1, min(a2), max(a2) from t1 group by a1;
+a1 min(a2) max(a2)
+a a b
+b a b
+c a b
+d a b
+select a1, a2, b, min(c), max(c) from t1 group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,max(c),min(c) from t1 group by a1,a2,b;
+a1 a2 b max(c) min(c)
+a a a d111 a111
+a a b h112 e112
+a b a l121 i121
+a b b p122 m122
+b a a d211 a211
+b a b h212 e212
+b b a l221 i221
+b b b p222 m222
+c a a d311 a311
+c a b h312 e312
+c b a l321 i321
+c b b p322 m322
+d a a d411 a411
+d a b h412 e412
+d b a l421 i421
+d b b p422 m422
+select a1,a2,b,max(c),min(c) from t2 group by a1,a2,b;
+a1 a2 b max(c) min(c)
+a a NULL a999 a777
+a a a d111 a111
+a a b h112 e112
+a b a l121 i121
+a b b p122 m122
+b a a d211 a211
+b a b h212 e212
+b b a l221 i221
+b b b p222 m222
+c a NULL c999 c777
+c a a d311 a311
+c a b h312 e312
+c b a l321 i321
+c b b p322 m322
+d a a d411 a411
+d a b h412 e412
+d b a l421 i421
+d b b p422 m422
+e a a NULL NULL
+e a b NULL NULL
+select min(a2), a1, max(a2), min(a2), a1 from t1 group by a1;
+min(a2) a1 max(a2) min(a2) a1
+a a b a a
+a b b a b
+a c b a c
+a d b a d
+select a1, b, min(c), a1, max(c), b, a2, max(c), max(c) from t1 group by a1, a2, b;
+a1 b min(c) a1 max(c) b a2 max(c) max(c)
+a a a111 a d111 a a d111 d111
+a b e112 a h112 b a h112 h112
+a a i121 a l121 a b l121 l121
+a b m122 a p122 b b p122 p122
+b a a211 b d211 a a d211 d211
+b b e212 b h212 b a h212 h212
+b a i221 b l221 a b l221 l221
+b b m222 b p222 b b p222 p222
+c a a311 c d311 a a d311 d311
+c b e312 c h312 b a h312 h312
+c a i321 c l321 a b l321 l321
+c b m322 c p322 b b p322 p322
+d a a411 d d411 a a d411 d411
+d b e412 d h412 b a h412 h412
+d a i421 d l421 a b l421 l421
+d b m422 d p422 b b p422 p422
+select min(a2) from t1 group by a1;
+min(a2)
+a
+a
+a
+a
+select a2, min(c), max(c) from t1 group by a1,a2,b;
+a2 min(c) max(c)
+a a111 d111
+a e112 h112
+b i121 l121
+b m122 p122
+a a211 d211
+a e212 h212
+b i221 l221
+b m222 p222
+a a311 d311
+a e312 h312
+b i321 l321
+b m322 p322
+a a411 d411
+a e412 h412
+b i421 l421
+b m422 p422
+explain select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1002 Using where; Using index for group-by
+explain select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1002 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1503 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1503 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 2004 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 2004 Using where; Using index for group-by
+explain select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 1503 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+select a1,a2,b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1, max(c) from t1 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 max(c)
+a d111
+a h112
+a l121
+a p122
+c d311
+c h312
+c l321
+c p322
+d d411
+d h412
+d l421
+d p422
+select a1,a2,b,min(c),max(c) from t1 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+b a a a211 d211
+b a b e212 h212
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b max(c)
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b max(c)
+a b a l121
+a b b p122
+b b a l221
+b b b p222
+c b a l321
+c b b p322
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+a b b m122 p122
+b b a i221 l221
+b b b m222 p222
+c b a i321 l321
+c b b m322 p322
+d b a i421 l421
+d b b m422 p422
+select a1,min(c),max(c) from t1 where a1 >= 'b' group by a1,a2,b;
+a1 min(c) max(c)
+b a211 d211
+b e212 h212
+b i221 l221
+b m222 p222
+c a311 d311
+c e312 h312
+c i321 l321
+c m322 p322
+d a411 d411
+d e412 h412
+d i421 l421
+d m422 p422
+select a1, max(c) from t1 where a1 in ('a','b','d') group by a1,a2,b;
+a1 max(c)
+a d111
+a h112
+a l121
+a p122
+b d211
+b h212
+b l221
+b p222
+d d411
+d h412
+d l421
+d p422
+select a1,a2,b, max(c) from t2 where a1 < 'd' group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+select a1,a2,b,min(c),max(c) from t2 where a1 < 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+e a a NULL NULL
+e a b NULL NULL
+select a1,a2,b, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+e a a NULL
+e a b NULL
+select a1, max(c) from t2 where a1 >= 'c' or a1 < 'b' group by a1,a2,b;
+a1 max(c)
+a a999
+a d111
+a h112
+a l121
+a p122
+c c999
+c d311
+c h312
+c l321
+c p322
+d d411
+d h412
+d l421
+d p422
+e NULL
+e NULL
+select a1,a2,b,min(c),max(c) from t2 where a1 >= 'c' or a2 < 'b' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+b a a a211 d211
+b a b e212 h212
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+e a a NULL NULL
+e a b NULL NULL
+select a1,a2,b, max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b max(c)
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where a1 = 'z' or a1 = 'b' or a1 = 'd' group by a1,a2,b;
+a1 a2 b min(c) max(c)
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b max(c)
+a b a l121
+a b b p122
+b b a l221
+b b b p222
+c b a l321
+c b b p322
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+a b b m122 p122
+b b a i221 l221
+b b b m222 p222
+c b a i321 l321
+c b b m322 p322
+d b a i421 l421
+d b b m422 p422
+select a1,min(c),max(c) from t2 where a1 >= 'b' group by a1,a2,b;
+a1 min(c) max(c)
+b a211 d211
+b e212 h212
+b i221 l221
+b m222 p222
+c c777 c999
+c a311 d311
+c e312 h312
+c i321 l321
+c m322 p322
+d a411 d411
+d e412 h412
+d i421 l421
+d m422 p422
+e NULL NULL
+e NULL NULL
+select a1, max(c) from t2 where a1 in ('a','b','d') group by a1,a2,b;
+a1 max(c)
+a a999
+a d111
+a h112
+a l121
+a p122
+b d211
+b h212
+b l221
+b p222
+d d411
+d h412
+d l421
+d p422
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by
+explain select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by
+explain select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 range idx_t3_0,idx_t3_1,idx_t3_2 idx_t3_1 6 NULL 126 Using where; Using index for group-by
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 e112
+b a b h212 e212
+c a b h312 e312
+d a b h412 e412
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a b b p122 e112
+b b b p222 e212
+c b b p322 e312
+d b b p422 e412
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 a111
+b a b h212 a211
+c a b h312 a311
+d a b h412 a411
+select a1,a2,b,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a b b p122 a111
+b b b p222 a211
+c b b p322 a311
+d b b p422 a411
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a h112 e112
+b h212 e212
+c h312 e312
+d h412 e412
+select a1,max(c),min(c) from t1 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a p122 e112
+b p222 e212
+c p322 e312
+d p422 e412
+select a1,max(c),min(c) from t1 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 max(c) min(c)
+a h112 a111
+b h212 a211
+c h312 a311
+d h412 a411
+select a1,a2,b, max(c) from t1 where (b = 'b') group by a1,a2;
+a1 a2 b max(c)
+a a b h112
+a b b p122
+b a b h212
+b b b p222
+c a b h312
+c b b p322
+d a b h412
+d b b p422
+select a1,a2,b, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b max(c)
+a a b h112
+a b b p122
+b a b h212
+b b b p222
+c a b h312
+c b b p322
+d a b h412
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+a b b m122 p122
+b a b e212 h212
+b b b m222 p222
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a b a111 h112
+a b b i121 p122
+b a b a211 h212
+b b b i221 p222
+c a b a311 h312
+c b b i321 p322
+d a b a411 h412
+d b b i421 p422
+select a1,a2, max(c) from t1 where (b = 'b') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+select a1,a2, max(c) from t1 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 e112
+b a b h212 e212
+c a b h312 e312
+d a b h412 e412
+e a b NULL NULL
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a b b p122 e112
+b b b p222 e212
+c b b p322 e312
+d b b p422 e412
+select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 a111
+b a b h212 a211
+c a b h312 a311
+d a b h412 a411
+e a b NULL NULL
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a h112 e112
+b h212 e212
+c h312 e312
+d h412 e412
+e NULL NULL
+select a1,max(c),min(c) from t2 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a p122 e112
+b p222 e212
+c p322 e312
+d p422 e412
+select a1,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 max(c) min(c)
+a h112 a111
+b h212 a211
+c h312 a311
+d h412 a411
+e NULL NULL
+select a1,a2,b, max(c) from t2 where (b = 'b') group by a1,a2;
+a1 a2 b max(c)
+a a b h112
+a b b p122
+b a b h212
+b b b p222
+c a b h312
+c b b p322
+d a b h412
+d b b p422
+e a b NULL
+select a1,a2,b, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b max(c)
+a a b h112
+a b b p122
+b a b h212
+b b b p222
+c a b h312
+c b b p322
+d a b h412
+d b b p422
+e a b NULL
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+a b b m122 p122
+b a b e212 h212
+b b b m222 p222
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+e a b NULL NULL
+select a1,a2,b,min(c),max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 b min(c) max(c)
+a a b a111 h112
+a b b i121 p122
+b a b a211 h212
+b b b i221 p222
+c a b a311 h312
+c b b i321 p322
+d a b a411 h412
+d b b i421 p422
+e a b NULL NULL
+select a1,a2, max(c) from t2 where (b = 'b') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+e a NULL
+select a1,a2, max(c) from t2 where (b = 'b' or b = 'a') group by a1,a2;
+a1 a2 max(c)
+a a h112
+a b p122
+b a h212
+b b p222
+c a h312
+c b p322
+d a h412
+d b p422
+e a NULL
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 e112
+b a b h212 e212
+c a b h312 e312
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 a2 b max(c) min(c)
+a b b p122 e112
+b b b p222 e212
+c b b p322 e312
+select a1,a2,b,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 a2 b max(c) min(c)
+a a b h112 a111
+b a b h212 a211
+c a b h312 a311
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a h112 e112
+b h212 e212
+c h312 e312
+select a1,max(c),min(c) from t3 where (a2 = 'a' or a2 = 'b') and (b = 'b') group by a1;
+a1 max(c) min(c)
+a p122 e112
+b p222 e212
+c p322 e312
+select a1,max(c),min(c) from t3 where (a2 = 'a') and (b = 'b' or b = 'a') group by a1;
+a1 max(c) min(c)
+a h112 a111
+b h212 a211
+c h312 a311
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 126 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL 251 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 251 Using where; Using index for group-by
+select a1,a2,b,min(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+a1 a2 b min(c)
+a a NULL a777
+c a NULL c777
+select a1,a2,b,min(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+a1 a2 b min(c)
+select a1,a2,b,max(c) from t2 where (a2 = 'a') and b is NULL group by a1;
+a1 a2 b max(c)
+a a NULL a999
+c a NULL c999
+select a1,a2,b,max(c) from t2 where (a2 = 'a' or a2 = 'b') and b is NULL group by a1;
+a1 a2 b max(c)
+select a1,a2,b,min(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b min(c)
+a a NULL a777
+c a NULL c777
+select a1,a2,b,max(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b max(c)
+a a NULL a999
+c a NULL c999
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+c a NULL c777 c999
+select a1,a2,b,min(c),max(c) from t2 where b is NULL group by a1,a2;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+c a NULL c777 c999
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+select a1,a2,b, max(c) from t1 where (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (c > 'f123') group by a1,a2,b;
+a1 a2 b max(c)
+a a b h112
+a b a l121
+a b b p122
+b a b h212
+b b a l221
+b b b p222
+c a b h312
+c b a l321
+c b b p322
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'f123') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b g112 h112
+a b a i121 l121
+a b b m122 p122
+b a b f212 h212
+b b a i221 l221
+b b b m222 p222
+c a b f312 h312
+c b a i321 l321
+c b b m322 p322
+d a b f412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (c < 'a0') group by a1,a2,b;
+a1 a2 b max(c)
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+select a1,a2,b, max(c) from t1 where (c < 'k321') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a k121
+b a a d211
+b a b h212
+b b a k221
+c a a d311
+c a b h312
+c b a j321
+d a a d411
+d a b h412
+d b a j421
+select a1,a2,b,min(c),max(c) from t1 where (c < 'k321') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 k121
+b a a a211 d211
+b a b e212 h212
+b b a i221 k221
+c a a a311 d311
+c a b e312 h312
+c b a i321 j321
+d a a a411 d411
+d a b e412 h412
+d b a i421 j421
+select a1,a2,b, max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 g112
+b a a b211 d211
+b a b e212 f212
+c a a b311 d311
+c a b e312 f312
+d a a b411 d411
+d a b e412 f412
+select a1,a2,b,min(c),max(c) from t1 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a a111 c111
+b a a a211 c211
+c a a a311 c311
+d a a a411 c411
+d a b g412 g412
+d b a k421 k421
+select a1,a2,b,min(c),max(c) from t1 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a a b311 d311
+c a b e312 h312
+d a a b411 d411
+d a b e412 h412
+select a1,a2,b,min(c),max(c) from t1 where (c between 'b111' and 'g112') or (c between 'd000' and 'i110') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a a b311 d311
+c a b e312 h312
+d a a b411 d411
+d a b e412 h412
+select a1,a2,b, max(c) from t2 where (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (c > 'f123') group by a1,a2,b;
+a1 a2 b max(c)
+a a b h112
+a b a l121
+a b b p122
+b a b h212
+b b a l221
+b b b p222
+c a b h312
+c b a l321
+c b b p322
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'f123') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b g112 h112
+a b a i121 l121
+a b b m122 p122
+b a b f212 h212
+b b a i221 l221
+b b b m222 p222
+c a b f312 h312
+c b a i321 l321
+c b b m322 p322
+d a b f412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (c < 'a0') group by a1,a2,b;
+a1 a2 b max(c)
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+select a1,a2,b, max(c) from t2 where (c < 'k321') group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a k121
+b a a d211
+b a b h212
+b b a k221
+c a NULL c999
+c a a d311
+c a b h312
+c b a j321
+d a a d411
+d a b h412
+d b a j421
+select a1,a2,b,min(c),max(c) from t2 where (c < 'k321') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+a b a i121 k121
+b a a a211 d211
+b a b e212 h212
+b b a i221 k221
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 j321
+d a a a411 d411
+d a b e412 h412
+d b a i421 j421
+select a1,a2,b, max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b max(c)
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c < 'a0') or (c > 'b1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a b111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a b211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b max(c)
+a a NULL a999
+a a a d111
+a a b h112
+a b a l121
+a b b p122
+b a a d211
+b a b h212
+b b a l221
+b b b p222
+c a NULL c999
+c a a d311
+c a b h312
+c b a l321
+c b b p322
+d a a d411
+d a b h412
+d b a l421
+d b b p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 d111
+a a b e112 h112
+a b a i121 l121
+a b b m122 p122
+b a a a211 d211
+b a b e212 h212
+b b a i221 l221
+b b b m222 p222
+c a NULL c777 c999
+c a a a311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a a411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t2 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 g112
+b a a b211 d211
+b a b e212 f212
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 f312
+d a a b411 d411
+d a b e412 f412
+select a1,a2,b,min(c),max(c) from t2 where (c < 'c5') or (c = 'g412') or (c = 'k421') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a NULL a777 a999
+a a a a111 c111
+b a a a211 c211
+c a a a311 c311
+d a a a411 c411
+d a b g412 g412
+d b a k421 k421
+select a1,a2,b,min(c),max(c) from t2 where ((c > 'b111') and (c <= 'g112')) or ((c > 'd000') and (c <= 'i110')) group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+d a a b411 d411
+d a b e412 h412
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c = t1.c )
+group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 Using where; Using index
+2 DEPENDENT SUBQUERY t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,min(c),max(c) from t1
+where exists ( select * from t2 where t2.c > 'b1' )
+group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by
+2 SUBQUERY t2 index NULL idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1002 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1002 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+b a b e212 h212
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t1 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+b b a i221 l221
+c b a i321 l321
+d b a i421 l421
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t1 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b min(c)
+select a1,a2,b,min(c) from t1 where (ord(a1) > 97) and (ord(a2) + ord(a1) > 194) and (b = 'c') group by a1,a2,b;
+a1 a2 b min(c)
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a b e112 h112
+b a b e212 h212
+c a b e312 h312
+c b b m322 p322
+d a b e412 h412
+d b b m422 p422
+e a b NULL NULL
+select a1,a2,b,min(c),max(c) from t2 where (a1 >= 'c' or a2 < 'b') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a a a c111 d111
+a a b e112 h112
+b a a b211 d211
+b a b e212 h212
+c a NULL c777 c999
+c a a b311 d311
+c a b e312 h312
+c b a i321 l321
+c b b m322 p322
+d a a b411 d411
+d a b e412 h412
+d b a i421 l421
+d b b m422 p422
+select a1,a2,b,min(c),max(c) from t2 where (a2 >= 'b') and (b = 'a') and (c > 'b111') group by a1,a2,b;
+a1 a2 b min(c) max(c)
+a b a i121 l121
+b b a i221 l221
+c b a i321 l321
+d b a i421 l421
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c < 'h112') or (c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t2 where ((a1 > 'a') or (a1 < '9')) and ((a2 >= 'b') and (a2 < 'z')) and (b = 'a') and ((c = 'j121') or (c > 'k121' and c < 'm122') or (c > 'o122') or (c < 'h112') or (c = 'c111')) group by a1,a2,b;
+a1 a2 b min(c)
+b b a k221
+c b a k321
+d b a k421
+select a1,a2,b,min(c) from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b min(c)
+explain select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+select a1,a2,b from t1 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b
+a a b
+b a b
+c a b
+c b b
+d a b
+d b b
+select a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+select a1,a2,b from t2 where (a1 >= 'c' or a2 < 'b') and (b > 'a') group by a1,a2,b;
+a1 a2 b
+a a b
+b a b
+c a b
+c b b
+d a b
+d b b
+e a b
+select a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121' or c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+explain select distinct a1,a2,b from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_1 idx_t1_1 163 NULL 1001 99.90 Using where; Using index for group-by
+Warnings:
+Note 1003 /* select#1 */ select distinct `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where ((`test`.`t1`.`c` = 'i121') and (`test`.`t1`.`b` = 'a') and (`test`.`t1`.`a2` >= 'b'))
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 Using where; Using index
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by
+explain select distinct a1,a2,b from t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using index for group-by
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 range idx_t2_1 idx_t2_1 163 NULL 1001 99.90 Using where; Using index for group-by
+Warnings:
+Note 1003 /* select#1 */ select distinct `test`.`t2`.`a1` AS `a1`,`test`.`t2`.`a2` AS `a2`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c` from `test`.`t2` where ((`test`.`t2`.`c` = 'i121') and (`test`.`t2`.`b` = 'a') and (`test`.`t2`.`a2` >= 'b'))
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_1,idx_t2_2 idx_t2_2 146 NULL 1000 Using where; Using index
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL 252 Using where; Using index for group-by
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL 252 Using where; Using index for group-by
+select distinct a1,a2,b from t1;
+a1 a2 b
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a');
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+a1 a2 b
+select distinct b from t1 where (a2 >= 'b') and (b = 'a');
+b
+a
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b';
+a1
+a
+d
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e';
+a1
+select distinct a1,a2,b from t2;
+a1 a2 b
+a a NULL
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a NULL
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+e a a
+e a b
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a');
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+a1 a2 b
+select distinct b from t2 where (a2 >= 'b') and (b = 'a');
+b
+a
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b';
+a1
+a
+d
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e';
+a1
+select distinct t_00.a1
+from t1 t_00
+where exists ( select * from t2 where a1 = t_00.a1 );
+a1
+a
+b
+c
+d
+select distinct a1,a1 from t1;
+a1 a1
+a a
+b b
+c c
+d d
+select distinct a2,a1,a2,a1 from t1;
+a2 a1 a2 a1
+a a a a
+b a b a
+a b a b
+b b b b
+a c a c
+b c b c
+a d a d
+b d b d
+select distinct t1.a1,t2.a1 from t1,t2;
+a1 a1
+a a
+b a
+c a
+d a
+a b
+b b
+c b
+d b
+a c
+b c
+c c
+d c
+a d
+b d
+c d
+d d
+a e
+b e
+c e
+d e
+explain select distinct a1,a2,b from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using index for group-by
+explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 501 Using where; Using index for group-by
+explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by; Using temporary; Using filesort
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by
+explain select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 252 Using where; Using index for group-by
+explain select distinct a1,a2,b from t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using index for group-by
+explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 163 NULL # Using where; Using index for group-by
+explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by
+explain select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_1,idx_t2_2 idx_t2_1 146 NULL # Using where; Using index for group-by; Using temporary; Using filesort
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL # Using where; Using index for group-by
+explain select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 129 NULL # Using where; Using index for group-by
+select distinct a1,a2,b from t1;
+a1 a2 b
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+select distinct b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+b
+a
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+a1
+a
+d
+select distinct a1 from t1 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+a1
+select distinct a1,a2,b from t2;
+a1 a2 b
+a a NULL
+a a a
+a a b
+a b a
+a b b
+b a a
+b a b
+b b a
+b b b
+c a NULL
+c a a
+c a b
+c b a
+c b b
+d a a
+d a b
+d b a
+d b b
+e a a
+e a b
+select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+a1 a2 b
+a b a
+b b a
+c b a
+d b a
+select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121') group by a1,a2,b;
+a1 a2 b c
+a b a i121
+select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1,a2,b;
+a1 a2 b
+select distinct b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b;
+b
+a
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'b' group by a1;
+a1
+a
+d
+select distinct a1 from t2 where a1 in ('a', 'd') and a2 = 'e' group by a1;
+a1
+explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by (scanning)
+explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1 idx_t1_1 163 NULL 1001 Using where; Using index for group-by (scanning)
+explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 100.00 Using where; Using index for group-by (scanning)
+Warnings:
+Note 1003 /* select#1 */ select count(distinct `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`) AS `count(distinct a1,a2,b)` from `test`.`t1` where ((`test`.`t1`.`b` = 'c') and (`test`.`t1`.`a1` > 'a') and (`test`.`t1`.`a2` > 'a'))
+explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 Using where; Using index
+explain extended select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 100.00 Using where; Using index for group-by (scanning)
+Warnings:
+Note 1003 /* select#1 */ select (98 + count(distinct `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`)) AS `98 + count(distinct a1,a2,b)` from `test`.`t1` where ((`test`.`t1`.`a1` > 'a') and (`test`.`t1`.`a2` > 'a'))
+select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
+count(distinct a1,a2,b)
+4
+select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
+count(distinct a1,a2,b,c)
+1
+select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
+count(distinct a1,a2,b)
+0
+select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
+count(distinct b)
+1
+select 98 + count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a');
+98 + count(distinct a1,a2,b)
+104
+explain select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 501 Using where; Using index for group-by
+explain select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 251 Using index for group-by
+select a1,a2,b, concat(min(c), max(c)) from t1 where a1 < 'd' group by a1,a2,b;
+a1 a2 b concat(min(c), max(c))
+a a a a111d111
+a a b e112h112
+a b a i121l121
+a b b m122p122
+b a a a211d211
+b a b e212h212
+b b a i221l221
+b b b m222p222
+c a a a311d311
+c a b e312h312
+c b a i321l321
+c b b m322p322
+select concat(a1,min(c)),b from t1 where a1 < 'd' group by a1,a2,b;
+concat(a1,min(c)) b
+aa111 a
+ae112 b
+ai121 a
+am122 b
+ba211 a
+be212 b
+bi221 a
+bm222 b
+ca311 a
+ce312 b
+ci321 a
+cm322 b
+select concat(a1,min(c)),b,max(c) from t1 where a1 < 'd' group by a1,a2,b;
+concat(a1,min(c)) b max(c)
+aa111 a d111
+ae112 b h112
+ai121 a l121
+am122 b p122
+ba211 a d211
+be212 b h212
+bi221 a l221
+bm222 b p222
+ca311 a d311
+ce312 b h312
+ci321 a l321
+cm322 b p322
+select concat(a1,a2),b,min(c),max(c) from t1 where a1 < 'd' group by a1,a2,b;
+concat(a1,a2) b min(c) max(c)
+aa a a111 d111
+aa b e112 h112
+ab a i121 l121
+ab b m122 p122
+ba a a211 d211
+ba b e212 h212
+bb a i221 l221
+bb b m222 p222
+ca a a311 d311
+ca b e312 h312
+cb a i321 l321
+cb b m322 p322
+select concat(ord(min(b)),ord(max(b))),min(b),max(b) from t1 group by a1,a2;
+concat(ord(min(b)),ord(max(b))) min(b) max(b)
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+9798 a b
+explain select a1,a2,b,d,min(c),max(c) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 NULL
+explain select a1,a2,b,d from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 NULL
+explain extended select a1,a2,min(b),max(b) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,min(`test`.`t1`.`b`) AS `min(b)`,max(`test`.`t1`.`b`) AS `max(b)` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`c` > 'a111')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2`
+explain extended select a1,a2,b,min(c),max(c) from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 100.00 Using where
+Warnings:
+Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,min(`test`.`t1`.`c`) AS `min(c)`,max(`test`.`t1`.`c`) AS `max(c)` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`d` > 'xy2')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+explain extended select a1,a2,b,c from t1
+where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 100.00 Using where
+Warnings:
+Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`d` > 'xy2')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`,`test`.`t1`.`c`
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b < 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b <= 'b' and b >= 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where
+explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 163 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b` from `test`.`t1` where (((`test`.`t1`.`a1` = 'b') or (`test`.`t1`.`a1` = 'd') or (`test`.`t1`.`a1` = 'a') or (`test`.`t1`.`a1` = 'c')) and (`test`.`t1`.`a2` > 'a') and (`test`.`t1`.`c` > 'a111')) group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where
+select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1;
+a1 a2 min(b) c
+a a a a111
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b = 'a') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where
+explain select a1,a2,b,min(c),max(c) from t2
+where (c > 'a000') and (c <= 'd999') and (c like '_8__') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 1000 Using where; Using index
+explain select a1, a2, b, c, min(d), max(d) from t1 group by a1,a2,b,c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index idx_t1_1 idx_t1_1 163 NULL 1000 NULL
+explain select a1,a2,count(a2) from t1 group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 Using index
+explain extended select a1,a2,count(a2) from t1 where (a1 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,count(`test`.`t1`.`a2`) AS `count(a2)` from `test`.`t1` where (`test`.`t1`.`a1` > 'a') group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+explain extended select sum(ord(a1)) from t1 where (a1 > 'a') group by a1,a2,b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 1000 100.00 Using where; Using index
+Warnings:
+Note 1003 /* select#1 */ select sum(ord(`test`.`t1`.`a1`)) AS `sum(ord(a1))` from `test`.`t1` where (`test`.`t1`.`a1` > 'a') group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`
+set optimizer_switch = 'multi_range_groupby=off';
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_0 65 NULL 1000 Using where
+set optimizer_switch = 'default';
+explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'a' or b = 'b') group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range idx_t2_0,idx_t2_1,idx_t2_2 idx_t2_1 163 NULL 126 Using where; Using index for group-by
+explain select distinct(a1) from t1 where ord(a2) = 98;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_0 65 NULL 1000 Using where
+select distinct(a1) from t1 where ord(a2) = 98;
+a1
+a
+b
+c
+d
+explain select a1 from t1 where a2 = 'b' group by a1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using where; Using index for group-by
+select a1 from t1 where a2 = 'b' group by a1;
+a1
+a
+b
+c
+d
+explain select distinct a1 from t1 where a2 = 'b';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 126 Using where; Using index for group-by
+select distinct a1 from t1 where a2 = 'b';
+a1
+a
+b
+c
+d
+drop table t1,t2,t3;
+create table t1 (c1 int not null,c2 int not null, primary key(c1,c2)) engine=RocksDB;
+insert into t1 (c1,c2) values
+(10,1),(10,2),(10,3),(20,4),(20,5),(20,6),(30,7),(30,8),(30,9);
+select distinct c1, c2 from t1 order by c2;
+c1 c2
+10 1
+10 2
+10 3
+20 4
+20 5
+20 6
+30 7
+30 8
+30 9
+select c1,min(c2) as c2 from t1 group by c1 order by c2;
+c1 c2
+10 1
+20 4
+30 7
+select c1,c2 from t1 group by c1,c2 order by c2;
+c1 c2
+10 1
+10 2
+10 3
+20 4
+20 5
+20 6
+30 7
+30 8
+30 9
+drop table t1;
+CREATE TABLE t1 (a varchar(5), b int(11), PRIMARY KEY (a,b)) engine=RocksDB;
+INSERT INTO t1 VALUES ('AA',1), ('AA',2), ('AA',3), ('BB',1), ('AA',4);
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+a
+AA
+SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+a
+BB
+EXPLAIN SELECT a FROM t1 WHERE a='AA' GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref PRIMARY PRIMARY 7 const 1000 Using where; Using index
+EXPLAIN SELECT a FROM t1 WHERE a='BB' GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref PRIMARY PRIMARY 7 const 1000 Using where; Using index
+SELECT DISTINCT a FROM t1 WHERE a='BB';
+a
+BB
+SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%';
+a
+BB
+SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a;
+a
+BB
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int(11) NOT NULL DEFAULT '0',
+b varchar(16) COLLATE latin1_general_ci NOT NULL DEFAULT '',
+PRIMARY KEY (a,b)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;
+CREATE PROCEDURE a(x INT)
+BEGIN
+DECLARE rnd INT;
+DECLARE cnt INT;
+WHILE x > 0 DO
+SET rnd= x % 100;
+SET cnt = (SELECT COUNT(*) FROM t1 WHERE a = rnd);
+INSERT INTO t1(a,b) VALUES (rnd, CAST(cnt AS CHAR));
+SET x= x - 1;
+END WHILE;
+END|
+CALL a(1000);
+SELECT a FROM t1 WHERE a=0;
+a
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+SELECT DISTINCT a FROM t1 WHERE a=0;
+a
+0
+SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0;
+COUNT(DISTINCT a)
+1
+DROP TABLE t1;
+DROP PROCEDURE a;
+CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a)) engine=RocksDB;
+INSERT INTO t1 (a) VALUES
+(''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
+('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
+('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
+EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY PRIMARY 66 NULL 1001 Using index for group-by
+SELECT DISTINCT a,a FROM t1 ORDER BY a;
+a a
+
+CENTRAL CENTRAL
+EASTERN EASTERN
+GREATER LONDON GREATER LONDON
+NORTH CENTRAL NORTH CENTRAL
+NORTH EAST NORTH EAST
+NORTH WEST NORTH WEST
+SCOTLAND SCOTLAND
+SOUTH EAST SOUTH EAST
+SOUTH WEST SOUTH WEST
+WESTERN WESTERN
+DROP TABLE t1;
+CREATE TABLE t1 (id1 INT, id2 INT) engine=RocksDB;
+CREATE TABLE t2 (id2 INT, id3 INT, id5 INT) engine=RocksDB;
+CREATE TABLE t3 (id3 INT, id4 INT) engine=RocksDB;
+CREATE TABLE t4 (id4 INT) engine=RocksDB;
+CREATE TABLE t5 (id5 INT, id6 INT) engine=RocksDB;
+CREATE TABLE t6 (id6 INT) engine=RocksDB;
+INSERT INTO t1 VALUES(1,1);
+INSERT INTO t2 VALUES(1,1,1);
+INSERT INTO t3 VALUES(1,1);
+INSERT INTO t4 VALUES(1);
+INSERT INTO t5 VALUES(1,1);
+INSERT INTO t6 VALUES(1);
+SELECT * FROM
+t1
+NATURAL JOIN
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+id2 id1 id3 id5 id4 id3 id6 id5
+1 1 1 1 1 1 1 1
+SELECT * FROM
+t1
+NATURAL JOIN
+(((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6) on t3.id4 = t5.id5) JOIN t2
+ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5));
+id2 id1 id4 id3 id6 id5 id3 id5
+1 1 1 1 1 1 1 1
+SELECT * FROM t1 NATURAL JOIN ((t3 join (t5 NATURAL JOIN t6)) JOIN t2);
+id2 id1 id3 id4 id6 id5 id3 id5
+1 1 1 1 1 1 1 1
+SELECT * FROM
+(t2 JOIN (t3 NATURAL JOIN t4, t5 NATURAL JOIN t6)
+ON (t3.id3 = t2.id3 AND t5.id5 = t2.id5))
+NATURAL JOIN
+t1;
+id2 id3 id5 id4 id3 id6 id5 id1
+1 1 1 1 1 1 1 1
+SELECT * FROM
+(t2 JOIN ((t3 NATURAL JOIN t4) join (t5 NATURAL JOIN t6)))
+NATURAL JOIN
+t1;
+id2 id3 id5 id4 id3 id6 id5 id1
+1 1 1 1 1 1 1 1
+DROP TABLE t1,t2,t3,t4,t5,t6;
+CREATE TABLE t1 (a int, b int, PRIMARY KEY (a,b), KEY b (b)) engine=RocksDB;
+INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range PRIMARY,b PRIMARY 8 NULL 501 Using where; Using index for group-by
+SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a;
+MAX(b) a
+1 1
+SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a;
+MIN(b) a
+2 1
+CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)) engine=RocksDB;
+INSERT INTO t2 SELECT a,b,b FROM t1;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range PRIMARY PRIMARY 12 NULL 251 Using where; Using index for group-by
+SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a;
+MIN(c)
+2
+DROP TABLE t1,t2;
+CREATE TABLE t1 (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3), (1,4), (1,5),
+(2,2), (2,3), (2,1), (3,1), (4,1), (4,2), (4,3), (4,4), (4,5), (4,6);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by
+FLUSH STATUS;
+SELECT max(b), a FROM t1 GROUP BY a;
+max(b) a
+5 1
+3 2
+1 3
+6 4
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+EXPLAIN SELECT max(b), a FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by
+FLUSH STATUS;
+CREATE TABLE t2 engine=RocksDB SELECT max(b), a FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+FLUSH STATUS;
+SELECT * FROM (SELECT max(b), a FROM t1 GROUP BY a) b;
+max(b) a
+5 1
+3 2
+1 3
+6 4
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+FLUSH STATUS;
+(SELECT max(b), a FROM t1 GROUP BY a) UNION
+(SELECT max(b), a FROM t1 GROUP BY a);
+max(b) a
+5 1
+3 2
+1 3
+6 4
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 16
+Handler_read_next 0
+EXPLAIN (SELECT max(b), a FROM t1 GROUP BY a) UNION
+(SELECT max(b), a FROM t1 GROUP BY a);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 range a a 5 NULL 501 Using index for group-by
+2 UNION t1 range a a 5 NULL 501 Using index for group-by
+NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL Using temporary
+EXPLAIN SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+FROM t1 AS t1_outer;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using index
+2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE EXISTS
+(SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using index
+2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+(SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) > 12;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE
+2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE
+a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer index NULL a 10 NULL 1000 Using where; Using index
+2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING
+a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer range a a 5 NULL 501 Using index for group-by
+2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by
+EXPLAIN SELECT 1 FROM t1 AS t1_outer1 JOIN t1 AS t1_outer2
+ON t1_outer1.a = (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2)
+AND t1_outer1.b = t1_outer2.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer2 index NULL a 10 NULL 1000 Using where; Using index
+1 PRIMARY t1_outer1 ref a a 10 const,test.t1_outer2.b 1 Using where; Using index
+2 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by
+EXPLAIN SELECT (SELECT (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2) x
+FROM t1 AS t1_outer) x2 FROM t1 AS t1_outer2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1_outer2 index NULL a 10 NULL 1000 Using index
+2 SUBQUERY t1_outer index NULL a 10 NULL 1000 Using index
+3 SUBQUERY t1 range a a 5 NULL 501 Using index for group-by
+CREATE TABLE t3 LIKE t1;
+FLUSH STATUS;
+INSERT INTO t3 SELECT a,MAX(b) FROM t1 GROUP BY a;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+DELETE FROM t3;
+FLUSH STATUS;
+INSERT INTO t3 SELECT 1, (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2)
+FROM t1 LIMIT 1;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+FLUSH STATUS;
+DELETE FROM t3 WHERE (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) > 10000;
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 0
+FLUSH STATUS;
+DELETE FROM t3 WHERE (SELECT (SELECT MAX(b) FROM t1 GROUP BY a HAVING a < 2) x
+FROM t1) > 10000;
+ERROR 21000: Subquery returns more than 1 row
+SHOW STATUS LIKE 'handler_read__e%';
+Variable_name Value
+Handler_read_key 8
+Handler_read_next 1
+DROP TABLE t1,t2,t3;
+CREATE TABLE t1 (a int, INDEX idx(a)) engine=RocksDB;
+INSERT INTO t1 VALUES
+(4), (2), (1), (2), (4), (2), (1), (4),
+(4), (2), (1), (2), (2), (4), (1), (4);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT DISTINCT(a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx idx 5 NULL 1001 Using index for group-by
+SELECT DISTINCT(a) FROM t1;
+a
+1
+2
+4
+EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range idx idx 5 NULL 1001 Using index for group-by
+SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1;
+a
+1
+2
+4
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT) engine=RocksDB;
+INSERT INTO t1 (a, b) VALUES (1,1), (1,2), (1,3);
+INSERT INTO t1 SELECT a + 1, b FROM t1;
+INSERT INTO t1 SELECT a + 2, b FROM t1;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 Using temporary; Using filesort
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+a MIN(b) MAX(b)
+4 1 3
+3 1 3
+2 1 3
+1 1 3
+CREATE INDEX break_it ON t1 (a, b);
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range break_it break_it 10 NULL 501 Using index for group-by
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a;
+a MIN(b) MAX(b)
+1 1 3
+2 1 3
+3 1 3
+4 1 3
+EXPLAIN
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range break_it break_it 10 NULL 501 Using index for group-by; Using temporary; Using filesort
+SELECT a, MIN(b), MAX(b) FROM t1 GROUP BY a ORDER BY a DESC;
+a MIN(b) MAX(b)
+4 1 3
+3 1 3
+2 1 3
+1 1 3
+EXPLAIN
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index break_it break_it 10 NULL 1000 Using index
+SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
+a MIN(b) MAX(b) AVG(b)
+4 1 3 2.0000
+3 1 3 2.0000
+2 1 3 2.0000
+1 1 3 2.0000
+DROP TABLE t1;
+create table t1 (a int, b int, primary key (a,b), key `index` (a,b)) engine=MyISAM;
+insert into t1 (a,b) values
+(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6),
+(0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13),
+(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),
+(1,7),(1,8),(1,9),(1,10),(1,11),(1,12),(1,13),
+(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),
+(2,7),(2,8),(2,9),(2,10),(2,11),(2,12),(2,13),
+(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6),
+(3,7),(3,8),(3,9),(3,10),(3,11),(3,12),(3,13);
+insert into t1 (a,b) select a, max(b)+1 from t1 where a = 0 group by a;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select * from t1;
+a b
+0 0
+0 1
+0 2
+0 3
+0 4
+0 5
+0 6
+0 7
+0 8
+0 9
+0 10
+0 11
+0 12
+0 13
+0 14
+1 0
+1 1
+1 2
+1 3
+1 4
+1 5
+1 6
+1 7
+1 8
+1 9
+1 10
+1 11
+1 12
+1 13
+2 0
+2 1
+2 2
+2 3
+2 4
+2 5
+2 6
+2 7
+2 8
+2 9
+2 10
+2 11
+2 12
+2 13
+3 0
+3 1
+3 2
+3 3
+3 4
+3 5
+3 6
+3 7
+3 8
+3 9
+3 10
+3 11
+3 12
+3 13
+explain extended select sql_buffer_result a, max(b)+1 from t1 where a = 0 group by a;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range PRIMARY,index PRIMARY 4 NULL 1 100.00 Using where; Using index for group-by; Using temporary
+Warnings:
+Note 1003 /* select#1 */ select sql_buffer_result `test`.`t1`.`a` AS `a`,(max(`test`.`t1`.`b`) + 1) AS `max(b)+1` from `test`.`t1` where (`test`.`t1`.`a` = 0) group by `test`.`t1`.`a`
+drop table t1;
+CREATE TABLE t1 (a int, b int, c int, d int,
+KEY foo (c,d,a,b), KEY bar (c,a,b,d)) engine=RocksDB;
+INSERT INTO t1 VALUES (1, 1, 1, 1), (1, 1, 1, 2), (1, 1, 1, 3), (1, 1, 1, 4);
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT a,b,c+1,d FROM t1;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range foo,bar foo 10 NULL 126 Using where; Using index for group-by
+SELECT DISTINCT c FROM t1 WHERE d=4;
+c
+1
+2
+DROP TABLE t1;
+#
+# Bug #45386: Wrong query result with MIN function in field list,
+# WHERE and GROUP BY clause
+#
+CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+# test MIN
+#should use range with index for group by
+EXPLAIN
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range a a 10 NULL 501 Using where; Using index for group-by
+#should return 1 row
+SELECT a, MIN(b) FROM t WHERE b <> 0 GROUP BY a;
+a MIN(b)
+2 1
+# test MAX
+#should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range a a 10 NULL 501 Using where; Using index for group-by
+#should return 1 row
+SELECT a, MAX(b) FROM t WHERE b <> 1 GROUP BY a;
+a MAX(b)
+2 0
+# test 3 ranges and use the middle one
+INSERT INTO t SELECT a, 2 FROM t;
+#should use range with index for group by
+EXPLAIN
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t range a a 10 NULL 501 Using where; Using index for group-by
+#should return 1 row
+SELECT a, MAX(b) FROM t WHERE b > 0 AND b < 2 GROUP BY a;
+a MAX(b)
+2 1
+DROP TABLE t;
+#
+# Bug #48472: Loose index scan inappropriately chosen for some WHERE
+# conditions
+#
+CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+SELECT a, MAX(b) FROM t WHERE 0=b+0 GROUP BY a;
+a MAX(b)
+2 0
+DROP TABLE t;
+End of 5.0 tests
+#
+# Bug #46607: Assertion failed: (cond_type == Item::FUNC_ITEM) results in
+# server crash
+#
+CREATE TABLE t (a INT, b INT, INDEX (a,b)) engine=RocksDB;
+INSERT INTO t VALUES (2,0), (2,0), (2,1), (2,1);
+INSERT INTO t SELECT * FROM t;
+SELECT a, MAX(b) FROM t WHERE b GROUP BY a;
+a MAX(b)
+2 1
+DROP TABLE t;
+CREATE TABLE t1(a INT NOT NULL, b INT NOT NULL, KEY (b)) engine=RocksDB;
+INSERT INTO t1 VALUES(1,1),(2,1);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+SELECT 1 AS c, b FROM t1 WHERE b IN (1,2) GROUP BY c, b;
+c b
+1 1
+SELECT a FROM t1 WHERE b=1;
+a
+1
+2
+DROP TABLE t1;
+#
+# Bug#47762: Incorrect result from MIN() when WHERE tests NOT NULL column
+# for NULL
+#
+## Test for NULLs allowed
+CREATE TABLE t1 ( a INT, KEY (a) ) engine=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x No matching min/max row
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+x x x x x x x x x Using where; Using index
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x No matching min/max row
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+MIN( a )
+NULL
+INSERT INTO t1 VALUES (NULL), (NULL);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Select tables optimized away
+SELECT MIN( a ) FROM t1 WHERE a <=> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+x x x x x x x x x Using where; Using index
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Select tables optimized away
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+MIN( a )
+NULL
+DROP TABLE t1;
+## Test for NOT NULLs
+CREATE TABLE t1 ( a INT NOT NULL PRIMARY KEY) engine=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+#
+# NULL-safe operator test disabled for non-NULL indexed columns.
+#
+# See bugs
+#
+# - Bug#52173: Reading NULL value from non-NULL index gives
+# wrong result in embedded server
+#
+# - Bug#52174: Sometimes wrong plan when reading a MAX value from
+# non-NULL index
+#
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a = NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a <> NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a > NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a < NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND 10;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN NULL AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+SELECT MIN( a ) FROM t1 WHERE a BETWEEN 10 AND NULL;
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE noticed after reading const tables
+x x x x x x x x x Using where; Using index
+SELECT MIN( a ) FROM t1 WHERE a = (SELECT a FROM t1 WHERE a < 0);
+MIN( a )
+NULL
+EXPLAIN
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+id select_type table type possible_keys key key_len ref rows Extra
+x x x x x x x x x Impossible WHERE
+SELECT MIN( a ) FROM t1 WHERE a IS NULL;
+MIN( a )
+NULL
+DROP TABLE t1;
+#
+# Bug#53859: Valgrind: opt_sum_query(TABLE_LIST*, List<Item>&, Item*) at
+# opt_sum.cc:305
+#
+CREATE TABLE t1 ( a INT, KEY (a) ) engine=RocksDB;
+INSERT INTO t1 VALUES (1), (2), (3);
+SELECT MIN( a ) AS min_a
+FROM t1
+WHERE a > 1 AND a IS NULL
+ORDER BY min_a;
+min_a
+NULL
+DROP TABLE t1;
+End of 5.1 tests
+#
+# WL#3220 (Loose index scan for COUNT DISTINCT)
+#
+CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b)) engine=RocksDB;
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+CREATE TABLE t2 (a INT, b INT, c INT, d INT, e INT, f INT, KEY (a,b,c)) engine=RocksDB;
+INSERT INTO t2 VALUES (1,1,1,1,1,1), (1,2,1,1,1,1), (1,3,1,1,1,1),
+(1,4,1,1,1,1);
+INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2;
+INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning)
+SELECT COUNT(DISTINCT a,b) FROM t1;
+COUNT(DISTINCT a,b)
+16
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning)
+SELECT COUNT(DISTINCT b,a) FROM t1;
+COUNT(DISTINCT b,a)
+16
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index a a 10 NULL 1000 Using index
+SELECT COUNT(DISTINCT b) FROM t1;
+COUNT(DISTINCT b)
+8
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+COUNT(DISTINCT a)
+1
+1
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning)
+SELECT COUNT(DISTINCT b) FROM t1 GROUP BY a;
+COUNT(DISTINCT b)
+8
+8
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index a a 10 NULL 1000 Using index; Using filesort
+SELECT COUNT(DISTINCT a) FROM t1 GROUP BY b;
+COUNT(DISTINCT a)
+2
+2
+2
+2
+2
+2
+2
+2
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index a a 10 NULL 1000 Using index
+SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 1000 Using index
+SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+COUNT(DISTINCT a, b + 0)
+16
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 1000 Using index
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT b) < 10;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 NULL
+SELECT COUNT(DISTINCT a) FROM t1 HAVING COUNT(DISTINCT c) < 10;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by
+SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10;
+1
+1
+EXPLAIN SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 10 NULL 1001 Using index for group-by (scanning)
+SELECT 1 FROM t1 GROUP BY a HAVING COUNT(DISTINCT b) > 1;
+1
+1
+1
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1_1 index a a 10 NULL 1000 Using index; Using temporary; Using filesort
+1 SIMPLE t1_2 index NULL a 10 NULL 1000 Using index; Using join buffer (Block Nested Loop)
+SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+COUNT(DISTINCT t1_1.a)
+1
+1
+EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 5 NULL 501 Using index for group-by
+SELECT COUNT(DISTINCT a), 12 FROM t1;
+COUNT(DISTINCT a) 12
+2 12
+EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 15 NULL 1001 Using index for group-by (scanning)
+SELECT COUNT(DISTINCT a, b, c) FROM t2;
+COUNT(DISTINCT a, b, c)
+16
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 5 NULL 251 Using index for group-by
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2;
+COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT a)
+2 3 1.5000
+EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 NULL
+SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT f) FROM t2;
+COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT f)
+2 3 1.0000
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 10 NULL 501 Using index for group-by (scanning)
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2;
+COUNT(DISTINCT a, b) COUNT(DISTINCT b, a)
+16 16
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 NULL
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, f) FROM t2;
+COUNT(DISTINCT a, b) COUNT(DISTINCT b, f)
+16 8
+EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 NULL
+SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, d) FROM t2;
+COUNT(DISTINCT a, b) COUNT(DISTINCT b, d)
+16 8
+EXPLAIN SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 15 NULL 1001 Using index for group-by (scanning)
+SELECT a, c, COUNT(DISTINCT c, a, b) FROM t2 GROUP BY a, b, c;
+a c COUNT(DISTINCT c, a, b)
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+EXPLAIN SELECT COUNT(DISTINCT c, a, b) FROM t2
+WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 15 NULL 1001 Using where; Using index for group-by (scanning)
+SELECT COUNT(DISTINCT c, a, b) FROM t2
+WHERE a > 5 AND b BETWEEN 10 AND 20 GROUP BY a, b, c;
+COUNT(DISTINCT c, a, b)
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+GROUP BY b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref a a 5 const 1000 Using where; Using index
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 WHERE a = 5
+GROUP BY b;
+COUNT(DISTINCT b) SUM(DISTINCT b)
+EXPLAIN SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 10 NULL 501 Using index for group-by (scanning)
+SELECT a, COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+a COUNT(DISTINCT b) SUM(DISTINCT b)
+1 8 36
+2 8 36
+EXPLAIN SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 10 NULL 501 Using index for group-by (scanning)
+SELECT COUNT(DISTINCT b), SUM(DISTINCT b) FROM t2 GROUP BY a;
+COUNT(DISTINCT b) SUM(DISTINCT b)
+8 36
+8 36
+EXPLAIN SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL a NULL NULL NULL 1000 Using where
+SELECT COUNT(DISTINCT a, b) FROM t2 WHERE c = 13 AND d = 42;
+COUNT(DISTINCT a, b)
+0
+EXPLAIN SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+WHERE b = 13 AND c = 42 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 15 NULL 251 Using where; Using index for group-by
+SELECT a, COUNT(DISTINCT a), SUM(DISTINCT a) FROM t2
+WHERE b = 13 AND c = 42 GROUP BY a;
+a COUNT(DISTINCT a) SUM(DISTINCT a)
+# This query could have been resolved using loose index scan since
+# the second part of count(..) is defined by a constant predicate
+EXPLAIN SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index NULL a 15 NULL 1000 Using where; Using index
+SELECT COUNT(DISTINCT a, b), SUM(DISTINCT a) FROM t2 WHERE b = 42;
+COUNT(DISTINCT a, b) SUM(DISTINCT a)
+0 NULL
+EXPLAIN SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index a a 15 NULL 1000 Using index
+SELECT SUM(DISTINCT a), MAX(b) FROM t2 GROUP BY a;
+SUM(DISTINCT a) MAX(b)
+1 8
+2 8
+EXPLAIN SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 15 NULL 1001 Using index for group-by (scanning)
+SELECT 42 * (a + c + COUNT(DISTINCT c, a, b)) FROM t2 GROUP BY a, b, c;
+42 * (a + c + COUNT(DISTINCT c, a, b))
+126
+126
+126
+126
+126
+126
+126
+126
+168
+168
+168
+168
+168
+168
+168
+168
+EXPLAIN SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index a a 15 NULL 1000 Using index
+SELECT (SUM(DISTINCT a) + MAX(b)) FROM t2 GROUP BY a;
+(SUM(DISTINCT a) + MAX(b))
+9
+10
+DROP TABLE t1,t2;
+# end of WL#3220 tests
+#
+# Bug#50539: Wrong result when loose index scan is used for an aggregate
+# function with distinct
+#
+CREATE TABLE t1 (
+f1 int(11) NOT NULL DEFAULT '0',
+f2 char(1) NOT NULL DEFAULT '',
+PRIMARY KEY (f1,f2)
+) engine=RocksDB;
+insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'),
+(3, 'A'), (3, 'B'), (3, 'C'), (3, 'D');
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+f1 COUNT(DISTINCT f2)
+1 3
+2 1
+3 4
+explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index PRIMARY PRIMARY 5 NULL 1000 Using index
+drop table t1;
+# End of test#50539.
+#
+# Bug#17217128 - BAD INTERACTION BETWEEN MIN/MAX AND
+# "HAVING SUM(DISTINCT)": WRONG RESULTS.
+#
+CREATE TABLE t (a INT, b INT, KEY(a,b)) engine=RocksDB;
+INSERT INTO t VALUES (1,1), (2,2), (3,3), (4,4), (1,0), (3,2), (4,5);
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+set optimizer_trace_max_mem_size=1048576;
+set @@session.optimizer_trace='enabled=on';
+set end_markers_in_json=on;
+ANALYZE TABLE t;
+Table Op Msg_type Msg_text
+test.t analyze status OK
+SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+a SUM(DISTINCT a) MIN(b)
+1 1 0
+2 2 2
+3 3 2
+4 4 4
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b) FROM t GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index a a 10 NULL 1000 Using index
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+OK
+1
+SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+a SUM(DISTINCT a) MAX(b)
+1 1 1
+2 2 2
+3 3 3
+4 4 5
+EXPLAIN SELECT a, SUM(DISTINCT a), MAX(b) FROM t GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index a a 10 NULL 1000 Using index
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+OK
+1
+SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+a MAX(b)
+1 1
+2 2
+3 3
+4 5
+EXPLAIN SELECT a, MAX(b) FROM t GROUP BY a HAVING SUM(DISTINCT a);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index a a 10 NULL 1000 Using index
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+OK
+1
+SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+SUM(DISTINCT a) MIN(b) MAX(b)
+10 0 5
+EXPLAIN SELECT SUM(DISTINCT a), MIN(b), MAX(b) FROM t;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index a a 10 NULL 1000 Using index
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+OK
+1
+SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+a SUM(DISTINCT a) MIN(b) MAX(b)
+1 1 0 1
+2 2 2 2
+3 3 2 3
+4 4 4 5
+EXPLAIN SELECT a, SUM(DISTINCT a), MIN(b), MAX(b) FROM t GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t index a a 10 NULL 1000 Using index
+SELECT TRACE RLIKE 'have_both_agg_distinct_and_min_max' AS OK
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+OK
+1
+SET optimizer_trace_max_mem_size=DEFAULT;
+SET optimizer_trace=DEFAULT;
+SET end_markers_in_json=DEFAULT;
+DROP TABLE t;
+#
+# Bug#18109609: LOOSE INDEX SCAN IS NOT USED WHEN IT SHOULD
+#
+CREATE TABLE t1 (
+id INT AUTO_INCREMENT PRIMARY KEY,
+c1 INT,
+c2 INT,
+KEY(c1,c2)) engine=RocksDB;
+INSERT INTO t1(c1,c2) VALUES
+(1, 1), (1,2), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3),
+(4,4), (4,5), (4,6), (4,7), (4,8), (4,9), (4,10), (4,11), (4,12), (4,13),
+(4,14), (4,15), (4,16), (4,17), (4,18), (4,19), (4,20),(5,5);
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+EXPLAIN SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range c1 c1 5 NULL 251 Using where; Using index for group-by
+FLUSH STATUS;
+SELECT MAX(c2), c1 FROM t1 WHERE c1 = 4 GROUP BY c1;
+MAX(c2) c1
+20 4
+SHOW SESSION STATUS LIKE 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 3
+Handler_read_last 1
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 0
+DROP TABLE t1;
+# End of test for Bug#18109609
+set global debug="-d,force_group_by";
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result b/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result
new file mode 100644
index 00000000000..93c8a464577
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result
@@ -0,0 +1,10 @@
+CREATE TABLE t1 (a INT, b CHAR(8), KEY ab(a, b)) ENGINE=rocksdb DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_bin;
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+SET debug_dbug="+d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+SELECT b FROM t1 FORCE INDEX(ab) WHERE a=35;
+b
+foo
+SET debug_dbug="-d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result b/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result
index 3ae0769338f..6d4139caefa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result
@@ -21,9 +21,100 @@ a b
5 e
6 f
DROP TABLE t1;
-#----------------------------------------
-# UNIQUE KEYS are not supported currently
-#-----------------------------------------
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+ERROR 23000: Duplicate entry '1' for key 'a'
+INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
+ERROR 23000: Duplicate entry '3' for key 'a'
+INSERT INTO t1 (a,b) VALUES (0,'');
+SELECT a,b FROM t1;
+a b
+0
+1 a
+100 a
+2 b
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+Warnings:
+Warning 1062 Duplicate entry '1' for key 'a'
+INSERT INTO t1 (a,b) VALUES (3,'a'),(4,'d') ON DUPLICATE KEY UPDATE a = a+10;
+SELECT a,b FROM t1;
+a b
+0
+1 a
+100 a
+12345 z
+13 c
+14 d
+2 b
+29 n
+30 m
+5 e
+6 f
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
+INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
+INSERT INTO t1 (a,b) VALUES (30,'m'),(29,'n');
+INSERT INTO t1 (a,b) VALUES (100,'b'), (2,'c');
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+ERROR 23000: Duplicate entry '1-a' for key 'a'
+SELECT a,b FROM t1;
+a b
+1 a
+100 a
+100 b
+2 b
+2 c
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+INSERT IGNORE INTO t1 (a,b) VALUES (1,'a'),(12345,'z');
+Warnings:
+Warning 1062 Duplicate entry '1-a' for key 'a'
+INSERT INTO t1 (a,b) VALUES (1,'a'),(12345,'z') ON DUPLICATE KEY UPDATE a = a+VALUES(a);
+SELECT a,b FROM t1;
+a b
+100 a
+100 b
+2 a
+2 b
+2 c
+24690 z
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+INSERT INTO t1 (a,b) VALUES (101,'x'),(101,'x');
+ERROR 23000: Duplicate entry '101-x' for key 'a'
+SELECT a,b FROM t1;
+a b
+100 a
+100 b
+2 a
+2 b
+2 c
+24690 z
+29 n
+3 c
+30 m
+4 d
+5 e
+6 f
+DROP TABLE t1;
CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
@@ -63,3 +154,109 @@ a b
5 e
6 f
DROP TABLE t1;
+
+INSERT on DUPLICATE KEY UPDATE with multiple keys
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+SELECT a,b FROM t1;
+a b
+1 aaaaaaaaaaaaaaaaaa
+10 aa
+2 aaaaaaa
+3 aa
+4 aa
+5 aa
+6 aa
+7 aa
+8 aa
+9 aa
+DROP TABLE t1;
+
+INSERT on DUPLICATE KEY UPDATE with secondary key
+
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+SELECT a,b,c,pk FROM t1;
+a b c pk
+1 a 22 1
+10 j 1 11
+2 b 6 3
+2 c 4 14
+3 c 1 4
+4 d 1 5
+5 e 1 6
+6 f 1 7
+7 g 1 8
+8 h 1 9
+9 i 1 10
+DROP TABLE t1;
+
+Disable caching and see if it still functions properly
+
+SELECT @@rocksdb_enable_insert_with_update_caching;
+@@rocksdb_enable_insert_with_update_caching
+1
+SET GLOBAL rocksdb_enable_insert_with_update_caching=0;
+SELECT @@rocksdb_enable_insert_with_update_caching;
+@@rocksdb_enable_insert_with_update_caching
+0
+
+INSERT on DUPLICATE KEY UPDATE with multiple keys
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+SELECT a,b FROM t1;
+a b
+1 aaaaaaaaaaaaaaaaaa
+10 aa
+2 aaaaaaa
+3 aa
+4 aa
+5 aa
+6 aa
+7 aa
+8 aa
+9 aa
+DROP TABLE t1;
+
+INSERT on DUPLICATE KEY UPDATE with secondary key
+
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+SELECT a,b,c,pk FROM t1;
+a b c pk
+1 a 22 1
+10 j 1 11
+2 b 6 3
+2 c 4 14
+3 c 1 4
+4 d 1 5
+5 e 1 6
+6 f 1 7
+7 g 1 8
+8 h 1 9
+9 i 1 10
+DROP TABLE t1;
+
+Cleanup
+
+SET GLOBAL rocksdb_enable_insert_with_update_caching=1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue884.result b/storage/rocksdb/mysql-test/rocksdb/r/issue884.result
new file mode 100644
index 00000000000..acfaca96d68
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue884.result
@@ -0,0 +1,79 @@
+create table test (
+a bigint(20) not null,
+b bigint(20) not null,
+c varchar(500) not null,
+d bigint(20) not null,
+e bigint(20) not null,
+f varchar(500) not null,
+g varchar(500) not null,
+h varchar(500) not null,
+i varchar(1000) not null,
+j varchar(16384) not null,
+k varchar(200) not null,
+l varchar(500) not null,
+m varchar(100) not null,
+n bigint(20) not null,
+primary key (a, b, m, c(100), l(100), d, e, f(100), g(100), h(100), n),
+key n (n),
+key d (d, a)
+) engine = rocksdb default charset = latin1;
+Table Op Msg_type Msg_text
+test.test analyze status OK
+explain
+select * from test where d = 10 and a = 10 and b = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE test index_merge PRIMARY,d d,PRIMARY 24,16 NULL # Using intersect(d,PRIMARY); Using where
+select * from test where d = 10 and a = 10 and b = 2;
+a b c d e f g h i j k l m n
+10 2 i 10 950 f g h i j k l m 950
+10 2 i 10 951 f g h i j k l m 951
+10 2 i 10 952 f g h i j k l m 952
+10 2 i 10 953 f g h i j k l m 953
+10 2 i 10 954 f g h i j k l m 954
+10 2 i 10 955 f g h i j k l m 955
+10 2 i 10 956 f g h i j k l m 956
+10 2 i 10 957 f g h i j k l m 957
+10 2 i 10 958 f g h i j k l m 958
+10 2 i 10 959 f g h i j k l m 959
+10 2 i 10 960 f g h i j k l m 960
+10 2 i 10 961 f g h i j k l m 961
+10 2 i 10 962 f g h i j k l m 962
+10 2 i 10 963 f g h i j k l m 963
+10 2 i 10 964 f g h i j k l m 964
+10 2 i 10 965 f g h i j k l m 965
+10 2 i 10 966 f g h i j k l m 966
+10 2 i 10 967 f g h i j k l m 967
+10 2 i 10 968 f g h i j k l m 968
+10 2 i 10 969 f g h i j k l m 969
+10 2 i 10 970 f g h i j k l m 970
+10 2 i 10 971 f g h i j k l m 971
+10 2 i 10 972 f g h i j k l m 972
+10 2 i 10 973 f g h i j k l m 973
+10 2 i 10 974 f g h i j k l m 974
+10 2 i 10 975 f g h i j k l m 975
+10 2 i 10 976 f g h i j k l m 976
+10 2 i 10 977 f g h i j k l m 977
+10 2 i 10 978 f g h i j k l m 978
+10 2 i 10 979 f g h i j k l m 979
+10 2 i 10 980 f g h i j k l m 980
+10 2 i 10 981 f g h i j k l m 981
+10 2 i 10 982 f g h i j k l m 982
+10 2 i 10 983 f g h i j k l m 983
+10 2 i 10 984 f g h i j k l m 984
+10 2 i 10 985 f g h i j k l m 985
+10 2 i 10 986 f g h i j k l m 986
+10 2 i 10 987 f g h i j k l m 987
+10 2 i 10 988 f g h i j k l m 988
+10 2 i 10 989 f g h i j k l m 989
+10 2 i 10 990 f g h i j k l m 990
+10 2 i 10 991 f g h i j k l m 991
+10 2 i 10 992 f g h i j k l m 992
+10 2 i 10 993 f g h i j k l m 993
+10 2 i 10 994 f g h i j k l m 994
+10 2 i 10 995 f g h i j k l m 995
+10 2 i 10 996 f g h i j k l m 996
+10 2 i 10 997 f g h i j k l m 997
+10 2 i 10 998 f g h i j k l m 998
+10 2 i 10 999 f g h i j k l m 999
+10 2 i 10 1000 f g h i j k l m 1000
+drop table test;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue896.result b/storage/rocksdb/mysql-test/rocksdb/r/issue896.result
new file mode 100644
index 00000000000..917c95733f7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue896.result
@@ -0,0 +1,17 @@
+CREATE TABLE `t1` (
+`a` bigint(20) NOT NULL,
+`b` varchar(10) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
+`u` bigint(20) unsigned NOT NULL,
+`d` bigint(20) DEFAULT NULL,
+PRIMARY KEY (`a`,`b`),
+KEY `d` (`d`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='ttl_duration=1000;ttl_col=u';
+INSERT INTO t1 VALUES (100, 'aaabbb', UNIX_TIMESTAMP(), 200);
+EXPLAIN SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL d 11 NULL # Using index
+# segfault here without the fix
+SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+COUNT(*)
+1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue900.result b/storage/rocksdb/mysql-test/rocksdb/r/issue900.result
new file mode 100644
index 00000000000..062d0da0864
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue900.result
@@ -0,0 +1,11 @@
+CREATE TABLE t1(c1 VARCHAR(1) CHARACTER SET 'utf8' COLLATE 'utf8_bin', c2 YEAR, c3 REAL(1,0) UNSIGNED, PRIMARY KEY(c1)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES(0,'0','0');
+INSERT INTO t1 VALUES('{0}','0','0');
+Warnings:
+Warning 1265 Data truncated for column 'c1' at row 1
+INSERT INTO t1 VALUES('1','0','1');
+ALTER TABLE t1 ADD INDEX(c3), ADD UNIQUE (c3);
+ERROR 23000: Duplicate entry '0' for key 'c3_2'
+SELECT c3 FROM t1 FORCE INDEX(c3) ORDER BY c3;
+ERROR 42000: Key 'c3' doesn't exist in table 't1'
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result b/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result
new file mode 100644
index 00000000000..600f19e0d61
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result
@@ -0,0 +1,15 @@
+create table t (i int primary key) engine=rocksdb;
+drop table t;
+create table t (i int primary key, j int, key(j) comment 'rev:bf5_2') engine=rocksdb;
+select RIGHT(HEX(index_number), 2) from information_schema.rocksdb_ddl where table_name = 't';
+RIGHT(HEX(index_number), 2)
+FE
+FF
+insert into t values (1, 1);
+select j from t order by j asc;
+j
+1
+select j from t order by j desc;
+j
+1
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result
new file mode 100644
index 00000000000..ff4625698ca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result
@@ -0,0 +1,128 @@
+reset master;
+set GLOBAL binlog_format= 'ROW';
+SET GLOBAL enable_blind_replace=ON;
+set binlog_format=row;
+create table t5 (c1 int primary key, c2 int);
+insert into t5 values (1, 1);
+insert into t5 values (2, 2);
+insert into t5 values (3, 3);
+select * from t5;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t5 values (1, 11);
+replace into t5 values (2, 22);
+replace into t5 values (3, 33);
+select case when variable_value-@c = 3 then 'true' else 'false' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t5;
+c1 c2
+1 11
+2 22
+3 33
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int)
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+flush logs;
+drop table t5;
+reset master;
+Replaying binlog events containing blind replace statements should work
+select * from t5;
+c1 c2
+1 11
+2 22
+3 33
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int)
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+drop table t5;
+reset master;
+Replaying the same binlog events with blind replace disabled should work
+The server should internally convert such events into updates
+SET GLOBAL enable_blind_replace=OFF;
+select * from t5;
+c1 c2
+1 11
+2 22
+3 33
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t5 (c1 int primary key, c2 int)
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t5)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+set GLOBAL binlog_format=DEFAULT;
+SET GLOBAL enable_blind_replace=DEFAULT;
+drop table t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result
index 5ac36e1f4ba..ac6615be093 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result
@@ -2,7 +2,7 @@ drop table if exists r1;
connect con1,localhost,root,,;
connect con2,localhost,root,,;
connection con1;
-create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4)) engine=rocksdb;
+create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4), KEY (value1, value2, value3)) engine=rocksdb;
insert into r1 values (1,1,1,1,1,1,1,1);
insert into r1 values (1,1,1,2,2,2,2,2);
insert into r1 values (1,1,2,1,3,3,3,3);
@@ -41,7 +41,7 @@ update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1';
/*!50601 SET @enable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=1', 'SET @dummy = 0') */;
/*!50601 PREPARE s FROM @enable_bulk_load */;
/*!50601 EXECUTE s */;
--- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000002', MASTER_LOG_POS=4832;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
-- SET GLOBAL gtid_slave_pos='0-1-18';
DROP TABLE IF EXISTS `r1`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
@@ -55,7 +55,8 @@ CREATE TABLE `r1` (
`value2` int(11) DEFAULT NULL,
`value3` int(11) DEFAULT NULL,
`value4` int(11) DEFAULT NULL,
- PRIMARY KEY (`id1`,`id2`,`id3`,`id4`)
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
/* ORDERING KEY (DESC) : PRIMARY */;
@@ -78,9 +79,70 @@ UNLOCK TABLES;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+/*!50601 SELECT count(*) INTO @is_mysql8 FROM information_schema.TABLES WHERE table_schema='performance_schema' AND table_name='session_variables' */;
+/*!50601 SET @check_rocksdb = CONCAT( 'SELECT count(*) INTO @is_rocksdb_supported FROM ', IF (@is_mysql8, 'performance', 'information'), '_schema.session_variables WHERE variable_name=\'rocksdb_bulk_load\'') */;
+/*!50601 PREPARE s FROM @check_rocksdb */;
+/*!50601 EXECUTE s */;
+/*!50601 SET @bulk_load_allow_sk = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load_allow_sk=1', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @bulk_load_allow_sk */;
+/*!50601 EXECUTE s */;
+/*!50601 SET @enable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=1', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @enable_bulk_load */;
+/*!50601 EXECUTE s */;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
+DROP TABLE IF EXISTS `r1`;
+/*!40101 SET @saved_cs_client = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `r1` (
+ `id1` int(11) NOT NULL DEFAULT '0',
+ `id2` int(11) NOT NULL DEFAULT '0',
+ `id3` varchar(100) NOT NULL DEFAULT '',
+ `id4` int(11) NOT NULL DEFAULT '0',
+ `value1` int(11) DEFAULT NULL,
+ `value2` int(11) DEFAULT NULL,
+ `value3` int(11) DEFAULT NULL,
+ `value4` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+/* ORDERING KEY (DESC) : PRIMARY */;
+
+LOCK TABLES `r1` WRITE;
+/*!40000 ALTER TABLE `r1` DISABLE KEYS */;
+INSERT INTO `r1` VALUES (2,2,'2',2,16,16,16,16),(2,2,'2',1,15,15,15,15),(2,2,'1',2,14,14,14,14),(2,2,'1',1,13,13,13,13),(2,1,'2',2,12,12,12,12),(2,1,'2',1,11,11,11,11),(2,1,'1',2,10,10,10,10),(2,1,'1',1,9,9,9,9),(1,2,'2',2,8,8,8,8),(1,2,'2',1,7,7,7,7),(1,2,'1',2,6,6,6,6),(1,2,'1',1,5,5,5,5),(1,1,'2',2,4,4,4,4),(1,1,'2',1,3,3,3,3),(1,1,'1',2,2,2,2,2),(1,1,'1',1,1,1,1,1);
+/*!40000 ALTER TABLE `r1` ENABLE KEYS */;
+UNLOCK TABLES;
+/*!50601 SET @disable_bulk_load = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load=0', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @disable_bulk_load */;
+/*!50601 EXECUTE s */;
+/*!50601 SET @disable_bulk_load_allow_sk = IF (@is_rocksdb_supported, 'SET SESSION rocksdb_bulk_load_allow_sk=0', 'SET @dummy = 0') */;
+/*!50601 PREPARE s FROM @disable_bulk_load_allow_sk */;
+/*!50601 EXECUTE s */;
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
rollback;
connection con1;
-1
+2
set @save_default_storage_engine=@@global.default_storage_engine;
SET GLOBAL default_storage_engine=rocksdb;
@@ -94,7 +156,7 @@ SET GLOBAL default_storage_engine=rocksdb;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
--- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000002', MASTER_LOG_POS=4832;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
-- SET GLOBAL gtid_slave_pos='0-1-18';
DROP TABLE IF EXISTS `r1`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
@@ -108,7 +170,8 @@ CREATE TABLE `r1` (
`value2` int(11) DEFAULT NULL,
`value3` int(11) DEFAULT NULL,
`value4` int(11) DEFAULT NULL,
- PRIMARY KEY (`id1`,`id2`,`id3`,`id4`)
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
/* ORDERING KEY : (null) */;
@@ -128,7 +191,7 @@ UNLOCK TABLES;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-2
+3
==== mysqldump with --innodb-stats-on-metadata ====
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
@@ -141,7 +204,7 @@ UNLOCK TABLES;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
--- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000002', MASTER_LOG_POS=4832;
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=BINLOG_START;
-- SET GLOBAL gtid_slave_pos='0-1-18';
DROP TABLE IF EXISTS `r1`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
@@ -155,7 +218,8 @@ CREATE TABLE `r1` (
`value2` int(11) DEFAULT NULL,
`value3` int(11) DEFAULT NULL,
`value4` int(11) DEFAULT NULL,
- PRIMARY KEY (`id1`,`id2`,`id3`,`id4`)
+ PRIMARY KEY (`id1`,`id2`,`id3`,`id4`),
+ KEY `value1` (`value1`,`value2`,`value3`)
) ENGINE=ROCKSDB DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
/* ORDERING KEY : (null) */;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result
new file mode 100644
index 00000000000..12223ebf228
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result
@@ -0,0 +1,98 @@
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+drop table t1;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+create trigger trg before insert on t1 for each row set @a:=1;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+create table t1(c1 int,c2 int) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+drop table t1;
+create table t1(c1 int,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+create table t1(c1 int primary key,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+create table t1(c1 int primary key,c2 int, key idx1(c2)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+SET GLOBAL enable_blind_replace=OFF;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+drop table t1;
+SET GLOBAL enable_blind_replace=DEFAULT;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result
new file mode 100644
index 00000000000..65ee9768339
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result
@@ -0,0 +1,46 @@
+SET GLOBAL enable_blind_replace=ON;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+replace into t1 values(1,11);
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+update t1 set c2=22 where c1=1;
+commit;
+# Reap update.
+commit;
+select * from t1;
+c1 c2
+1 22
+2 2
+3 3
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+update t1 set c2=55 where c1=1;
+SELECT @@global.enable_blind_replace;
+@@global.enable_blind_replace
+1
+begin;
+replace into t1 values(1,66);
+commit;
+# Reap replace into.
+commit;
+select * from t1;
+c1 c2
+1 66
+2 2
+3 3
+drop table t1;
+SET GLOBAL enable_blind_replace=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result
index c0903eda663..070169fd674 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result
@@ -33,6 +33,13 @@ CF_NAME OPTION_TYPE VALUE
__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.26
default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+SET @@global.rocksdb_update_cf_options = 'cf2={prefix_extractor=capped:28};';
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.26
+cf2 PREFIX_EXTRACTOR rocksdb.CappedPrefix.28
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
COUNT(*)
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
index de4ed87865e..6cc4cc7a1dc 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -884,11 +884,14 @@ rocksdb_bulk_load_allow_sk OFF
rocksdb_bulk_load_allow_unsorted OFF
rocksdb_bulk_load_size 1000
rocksdb_bytes_per_sync 0
+rocksdb_cache_dump ON
+rocksdb_cache_high_pri_pool_ratio 0.000000
rocksdb_cache_index_and_filter_blocks ON
+rocksdb_cache_index_and_filter_with_high_priority ON
rocksdb_checksums_pct 100
rocksdb_collect_sst_properties ON
rocksdb_commit_in_the_middle OFF
-rocksdb_commit_time_batch_for_recovery OFF
+rocksdb_commit_time_batch_for_recovery ON
rocksdb_compact_cf
rocksdb_compaction_readahead_size 0
rocksdb_compaction_sequential_deletes 0
@@ -910,9 +913,11 @@ rocksdb_debug_ttl_rec_ts 0
rocksdb_debug_ttl_snapshot_ts 0
rocksdb_default_cf_options
rocksdb_delayed_write_rate 0
+rocksdb_delete_cf
rocksdb_delete_obsolete_files_period_micros 21600000000
rocksdb_enable_2pc ON
rocksdb_enable_bulk_load_api ON
+rocksdb_enable_insert_with_update_caching ON
rocksdb_enable_thread_tracking ON
rocksdb_enable_ttl ON
rocksdb_enable_ttl_read_filtering ON
@@ -963,10 +968,10 @@ rocksdb_persistent_cache_size_mb 0
rocksdb_pin_l0_filter_and_index_blocks_in_cache ON
rocksdb_print_snapshot_conflict_queries OFF
rocksdb_rate_limiter_bytes_per_sec 0
-rocksdb_read_free_rpl_tables
rocksdb_records_in_range 50
rocksdb_remove_mariabackup_checkpoint OFF
rocksdb_reset_stats OFF
+rocksdb_rollback_on_timeout OFF
rocksdb_seconds_between_stat_computes 3600
rocksdb_signal_drop_index_thread OFF
rocksdb_sim_cache_size 0
@@ -975,6 +980,7 @@ rocksdb_skip_fill_cache OFF
rocksdb_skip_unique_check_tables .*
rocksdb_sst_mgr_rate_bytes_per_sec 0
rocksdb_stats_dump_period_sec 600
+rocksdb_stats_level 0
rocksdb_stats_recalc_rate 0
rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
@@ -1359,7 +1365,7 @@ insert into t1 select (@a:=@a+1), 1234 from information_schema.session_variables
set @tmp1= @@rocksdb_max_row_locks;
set rocksdb_max_row_locks= 20;
update t1 set a=a+10;
-ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to max_num_locks limit' from ROCKSDB
+ERROR HY000: Got error 10 'Operation aborted: Failed to acquire lock due to rocksdb_max_row_locks limit' from ROCKSDB
DROP TABLE t1;
#
# Test AUTO_INCREMENT behavior problem,
@@ -1462,8 +1468,9 @@ set autocommit=1;
drop table t0, t1;
#
# Check status variables
+# NOTE: We exclude rocksdb_num_get_for_update_calls because it's a debug only status var
#
-show status like 'rocksdb%';
+show status where variable_name like 'rocksdb%' and variable_name not like '%num_get_for_update%';
Variable_name Value
Rocksdb_rows_deleted #
Rocksdb_rows_inserted #
@@ -1574,7 +1581,7 @@ Rocksdb_write_other #
Rocksdb_write_self #
Rocksdb_write_timedout #
Rocksdb_write_wal #
-select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%';
+select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
VARIABLE_NAME
ROCKSDB_ROWS_DELETED
ROCKSDB_ROWS_INSERTED
@@ -1687,7 +1694,7 @@ ROCKSDB_WRITE_TIMEDOUT
ROCKSDB_WRITE_WAL
# RocksDB-SE's status variables are global internally
# but they are shown as both session and global, like InnoDB's status vars.
-select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%';
+select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
VARIABLE_NAME
ROCKSDB_ROWS_DELETED
ROCKSDB_ROWS_INSERTED
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result
index 505487c08ba..aae6de12f6a 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result
@@ -110,7 +110,7 @@ set session debug_dbug= "-d,myrocks_simulate_bad_key_checksum1";
explain
select a from t3 force index(a) where a<4;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t3 index a a 5 NULL # Using where; Using index
+1 SIMPLE t3 range a a 5 NULL # Using where; Using index
select a from t3 force index(a) where a<4;
a
1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result
index ea9114c14d1..9106e79f80c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result
@@ -1,10 +1,12 @@
connect con, localhost, root,,;
connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET debug_sync='RESET';
-DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
-INSERT INTO t1 VALUES(1,1), (2,2), (3,3);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
connection default;
@@ -15,17 +17,430 @@ connection con;
pk a
2 2
3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a));
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--SK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+3
+4
+5
+--SK middle row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--SK end row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with PRIMARY
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+1 5 100
+--End row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with sk
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+1 5 100
+--End row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 4 100
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
+5 5
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='RESET';
+CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a));
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--SK first row delete
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+3
+4
+5
+--SK middle row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+4
+5
+--SK end row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+a
+2
+4
connection default;
disconnect con;
set debug_sync='RESET';
drop table t1;
connect con, localhost, root,,;
connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET debug_sync='RESET';
-DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT);
-INSERT INTO t1 VALUES(1,1), (2,2), (3,3);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
connection default;
@@ -34,19 +449,33 @@ DELETE FROM t1 WHERE pk = 3;
SET debug_sync='now SIGNAL go';
connection con;
pk a
+4 4
2 2
1 1
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+2 2
connection default;
disconnect con;
set debug_sync='RESET';
drop table t1;
connect con, localhost, root,,;
connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET debug_sync='RESET';
-DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
-INSERT INTO t1 VALUES(1,1), (2,2), (3,3);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
connection default;
@@ -57,17 +486,57 @@ connection con;
pk a
2 2
3 3
+4 4
+5 5
+--PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
+5 5
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk ASC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+2 2
+4 4
connection default;
disconnect con;
set debug_sync='RESET';
drop table t1;
connect con, localhost, root,,;
connection default;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET debug_sync='RESET';
-DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT);
-INSERT INTO t1 VALUES(1,1), (2,2), (3,3);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+--PK first row delete
connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+3 3
+2 2
+1 1
+--PK middle row delete
SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
connection default;
@@ -76,8 +545,126 @@ DELETE FROM t1 WHERE pk = 3;
SET debug_sync='now SIGNAL go';
connection con;
pk a
+4 4
2 2
1 1
+--PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+SELECT * FROM t1 order by t1.pk DESC FOR UPDATE;
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+DELETE FROM t1 WHERE pk = 1;
+SET debug_sync='now SIGNAL go';
+connection con;
+pk a
+4 4
+2 2
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with PRIMARY
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 200
+1 4 200
+1 5 200
+--End row delete with PRIMARY
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (PRIMARY) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 300
+1 4 300
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+connect con, localhost, root,,;
+connection default;
+set debug_sync='RESET';
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+--First row delete with sk
+connection con;
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=100 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 100
+1 3 100
+1 4 100
+1 5 100
+--Middle row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=200 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 200
+1 4 200
+1 5 200
+--End row delete with sk
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+update t1 force index (sk) set value=300 where id1=1;
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+connection con;
+select * from t1 where id1=1;
+id1 id2 value
+1 2 300
+1 4 300
connection default;
disconnect con;
set debug_sync='RESET';
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result
new file mode 100644
index 00000000000..fabf077e27a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result
@@ -0,0 +1,335 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists t1;
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3), (4,4);
+include/sync_slave_sql_with_master.inc
+
+# regular update/delete. With rocks_read_free_rpl=PK_SK, rocksdb_rows_read does not increase on slaves
+
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set value=value+1 where id=1;
+delete from t1 where id=4;
+select * from t1;
+id value
+1 2
+2 2
+3 3
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+id value
+1 2
+2 2
+3 3
+
+# "rocks_read_free_rpl=PK_SK" makes "row not found error" not happen anymore
+
+include/stop_slave.inc
+delete from t1 where id in (2, 3);
+include/start_slave.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set value=value+1 where id=3;
+delete from t1 where id=2;
+select * from t1;
+id value
+1 2
+3 4
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+id value
+1 2
+3 4
+
+## tables without primary key -- read free replication should be disabled
+
+
+#no index
+
+drop table t1;
+create table t1 (c1 int, c2 int);
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+include/sync_slave_sql_with_master.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t1;
+c1 c2
+3 100
+4 4
+5 5
+
+#secondary index only
+
+drop table t1;
+create table t1 (c1 int, c2 int, index i(c1));
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+include/sync_slave_sql_with_master.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t1;
+c1 c2
+3 100
+4 4
+5 5
+
+## large row operations -- primary key modification, secondary key modification
+
+drop table t1;
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+include/sync_slave_sql_with_master.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+#updating all secondary keys by 1
+
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#updating all primary keys by 2
+
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#updating secondary keys after truncating t1 on slave
+
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set c2=c2+10;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#updating primary keys after truncating t1 on slave
+
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+update t1 set id2=id2+10;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+#deleting half rows
+
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+delete from t1 where id1 <= 5000;
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+include/diff_tables.inc [master:t1, slave:t1]
+
+# rocksdb_read_free_rpl = PK_ONLY i.e. it only works on tables with only PK
+
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+start slave;
+[on master]
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+[on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+[on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+include/wait_for_slave_sql_error.inc [errno=1032]
+select count(*) from t2 force index(primary);
+count(*)
+2
+select * from t2 where id=1;
+id i1 i2 value
+1 1 100 100
+select i1 from t2 where i1=1;
+i1
+1
+select i2 from t2 where i2=100;
+i2
+100
+select count(*) from u2 force index(primary);
+count(*)
+1
+select count(*) from u2 force index(i1);
+count(*)
+1
+select count(*) from u2 force index(i2);
+count(*)
+1
+select * from u2 where id=1;
+id i1 i2 value
+select i1 from u2 where i1=1;
+i1
+select i2 from u2 where i2=100;
+i2
+include/wait_for_slave_sql_to_start.inc
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_SK;
+start slave;
+
+# some tables with read-free replication on and some with it off
+
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = "t.*";
+start slave;
+[on master]
+drop table if exists t2;
+drop table if exists u2;
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int);
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+[on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+[on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+include/wait_for_slave_sql_error.inc [errno=1032]
+select count(*) from t2 force index(primary);
+count(*)
+2
+select * from t2 where id=1;
+id i1 i2 value
+1 1 100 100
+select i1 from t2 where i1=1;
+i1
+1
+select i2 from t2 where i2=100;
+i2
+100
+select count(*) from u2 force index(primary);
+count(*)
+1
+select * from u2 where id=1;
+id i1 i2 value
+select i1 from u2 where i1=1;
+i1
+select i2 from u2 where i2=100;
+i2
+include/wait_for_slave_sql_to_start.inc
+[on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = ".*";
+start slave;
+
+# secondary keys lose rows
+
+[on master]
+create table t3 (id int primary key, i1 int, i2 int, value int, index(i1),
+index(i2));
+insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+delete from t3 where id <= 2;
+[on master]
+update t3 set i2=100, value=100 where id=1;
+include/sync_slave_sql_with_master.inc
+select count(*) from t3 force index(primary);
+count(*)
+2
+select count(*) from t3 force index(i1);
+count(*)
+1
+select count(*) from t3 force index(i2);
+count(*)
+2
+select * from t3 where id=1;
+id i1 i2 value
+1 1 100 100
+select i1 from t3 where i1=1;
+i1
+select i2 from t3 where i2=100;
+i2
+100
+
+# secondary keys have extra rows
+
+[on master]
+create table t4 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t4 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+include/sync_slave_sql_with_master.inc
+[on slave]
+update t4 set i1=100 where id=1;
+[on master]
+delete from t4 where id=1;
+include/sync_slave_sql_with_master.inc
+[on slave]
+select count(*) from t4 force index(primary);
+count(*)
+2
+select count(*) from t4 force index(i1);
+count(*)
+3
+select count(*) from t4 force index(i2);
+count(*)
+2
+select i1 from t4 where i1=100;
+i1
+100
+
+# inserts are also read-free
+
+[on master]
+drop table if exists t2;
+drop table if exists t3;
+create table t2 (id int primary key, i1 int, i2 int);
+create table t3 (id int primary key, i1 int, i2 int, key(i1));
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+insert into t2 values(1, 1, 1);
+insert into t2 values(2, 2, 2);
+insert into t3 values(1, 1, 1);
+insert into t3 values(2, 2, 2);
+include/sync_slave_sql_with_master.inc
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t2;
+id i1 i2
+1 1 1
+2 2 2
+select * from t3;
+id i1 i2
+1 1 1
+2 2 2
+drop table t1, t2, t3, t4, u2;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result
new file mode 100644
index 00000000000..9e3c7a0582b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result
@@ -0,0 +1,35 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+include/stop_slave.inc
+set @@global.rocksdb_read_free_rpl = PK_SK;
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+include/diff_tables.inc [master:t1, slave:t1]
+include/diff_tables.inc [master:t2, slave:t2]
+include/diff_tables.inc [master:t3, slave:t3]
+include/diff_tables.inc [master:t4, slave:t4]
+include/diff_tables.inc [master:t5, slave:t5]
+include/diff_tables.inc [master:t6, slave:t6]
+include/diff_tables.inc [master:t7, slave:t7]
+include/diff_tables.inc [master:t8, slave:t8]
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+include/diff_tables.inc [master:t1, slave:t1]
+include/diff_tables.inc [master:t2, slave:t2]
+include/diff_tables.inc [master:t3, slave:t3]
+include/diff_tables.inc [master:t4, slave:t4]
+include/diff_tables.inc [master:t5, slave:t5]
+include/diff_tables.inc [master:t6, slave:t6]
+include/diff_tables.inc [master:t7, slave:t7]
+include/diff_tables.inc [master:t8, slave:t8]
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.rocksdb_read_free_rpl = default;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result
new file mode 100644
index 00000000000..adf05d06aac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result
@@ -0,0 +1,84 @@
+drop table if exists t1;
+SET @@global.rocksdb_rollback_on_timeout = 1;
+show variables like 'rocksdb_rollback_on_timeout';
+Variable_name Value
+rocksdb_rollback_on_timeout ON
+create table t1 (a int unsigned not null primary key) engine = rocksdb;
+insert into t1 values (1);
+commit;
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con2;
+begin work;
+insert into t1 values (5);
+insert into t1 values (6);
+update t1 set a = a + 1 where a = 1;
+connection con1;
+begin work;
+insert into t1 values (7);
+insert into t1 values (8);
+update t1 set a = a + 1 where a = 1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+select * from t1;
+a
+1
+commit;
+connection con2;
+select * from t1;
+a
+2
+5
+6
+commit;
+connection default;
+select * from t1;
+a
+2
+5
+6
+SET @@global.rocksdb_rollback_on_timeout = 0;
+show variables like 'rocksdb_rollback_on_timeout';
+Variable_name Value
+rocksdb_rollback_on_timeout OFF
+connection con2;
+begin work;
+insert into t1 values (9);
+insert into t1 values (10);
+update t1 set a = a + 1 where a = 2;
+connection con1;
+begin work;
+insert into t1 values (11);
+insert into t1 values (12);
+update t1 set a = a + 1 where a = 2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+select * from t1;
+a
+2
+5
+6
+11
+12
+commit;
+connection con2;
+select * from t1;
+a
+3
+5
+6
+9
+10
+commit;
+connection default;
+select * from t1;
+a
+3
+5
+6
+9
+10
+11
+12
+SET @@global.rocksdb_rollback_on_timeout = DEFAULT;
+drop table t1;
+disconnect con1;
+disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_read_free.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_read_free.result
deleted file mode 100644
index 82609f46423..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/r/rpl_read_free.result
+++ /dev/null
@@ -1,321 +0,0 @@
-include/master-slave.inc
-Warnings:
-Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
-Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
-[connection master]
-drop table if exists t1;
-create procedure save_read_stats()
-begin
-select rows_requested into @rq from information_schema.table_statistics
-where table_schema=database() and table_name='t1';
-select variable_value into @rr from information_schema.global_status
-where variable_name='rocksdb_rows_read';
-select variable_value into @ru from information_schema.global_status
-where variable_name='rocksdb_rows_updated';
-select variable_value into @rd from information_schema.global_status
-where variable_name='rocksdb_rows_deleted';
-end//
-create procedure get_read_stats()
-begin
-select rows_requested - @rq as rows_requested from
-information_schema.table_statistics
-where table_schema=database() and table_name='t1';
-select variable_value - @rr as rows_read from
-information_schema.global_status
-where variable_name='rocksdb_rows_read';
-select variable_value - @ru as rows_updated from
-information_schema.global_status
-where variable_name='rocksdb_rows_updated';
-select variable_value - @rd as rows_deleted from
-information_schema.global_status
-where variable_name='rocksdb_rows_deleted';
-end//
-create table t1 (id int primary key, value int);
-insert into t1 values (1,1), (2,2), (3,3), (4,4);
-include/sync_slave_sql_with_master.inc
-
-# regular update/delete. With rocks_read_free_rpl_tables=.*, rocksdb_rows_read does not increase on slaves
-
-call save_read_stats();
-update t1 set value=value+1 where id=1;
-delete from t1 where id=4;
-select * from t1;
-id value
-1 2
-2 2
-3 3
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-0
-rows_read
-0
-rows_updated
-1
-rows_deleted
-1
-select * from t1;
-id value
-1 2
-2 2
-3 3
-
-# "rocks_read_free_rpl_tables=.*" makes "row not found error" not happen anymore
-
-include/stop_slave.inc
-delete from t1 where id in (2, 3);
-include/start_slave.inc
-call save_read_stats();
-update t1 set value=value+1 where id=3;
-delete from t1 where id=2;
-select * from t1;
-id value
-1 2
-3 4
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-0
-rows_read
-0
-rows_updated
-1
-rows_deleted
-1
-select * from t1;
-id value
-1 2
-3 4
-
-## tables without primary key -- read free replication should be disabled
-
-
-#no index
-
-drop table t1;
-create table t1 (c1 int, c2 int);
-insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
-include/sync_slave_sql_with_master.inc
-call save_read_stats();
-update t1 set c2=100 where c1=3;
-delete from t1 where c1 <= 2;
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-5
-rows_read
-5
-rows_updated
-1
-rows_deleted
-2
-select * from t1;
-c1 c2
-3 100
-4 4
-5 5
-
-#secondary index only
-
-drop table t1;
-create table t1 (c1 int, c2 int, index i(c1));
-insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
-include/sync_slave_sql_with_master.inc
-call save_read_stats();
-update t1 set c2=100 where c1=3;
-delete from t1 where c1 <= 2;
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-3
-rows_read
-3
-rows_updated
-1
-rows_deleted
-2
-select * from t1;
-c1 c2
-3 100
-4 4
-5 5
-
-## large row operations -- primary key modification, secondary key modification
-
-drop table t1;
-create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
-include/sync_slave_sql_with_master.inc
-call save_read_stats();
-
-#updating all seconary keys by 1
-
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-0
-rows_read
-0
-rows_updated
-10000
-rows_deleted
-0
-include/diff_tables.inc [master:t1, slave:t1]
-
-#updating all primary keys by 2
-
-call save_read_stats();
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-0
-rows_read
-0
-rows_updated
-10000
-rows_deleted
-0
-include/diff_tables.inc [master:t1, slave:t1]
-
-#updating secondary keys after truncating t1 on slave
-
-truncate table t1;
-call save_read_stats();
-update t1 set c2=c2+10;
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-0
-rows_read
-0
-rows_updated
-10000
-rows_deleted
-0
-include/diff_tables.inc [master:t1, slave:t1]
-
-#updating primary keys after truncating t1 on slave
-
-truncate table t1;
-call save_read_stats();
-update t1 set id2=id2+10;
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-0
-rows_read
-0
-rows_updated
-10000
-rows_deleted
-0
-include/diff_tables.inc [master:t1, slave:t1]
-
-#deleting half rows
-
-call save_read_stats();
-delete from t1 where id1 <= 5000;
-include/sync_slave_sql_with_master.inc
-call get_read_stats();
-rows_requested
-0
-rows_read
-0
-rows_updated
-0
-rows_deleted
-5000
-include/diff_tables.inc [master:t1, slave:t1]
-[on master]
-create table t2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
-insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
-include/sync_slave_sql_with_master.inc
-[on slave]
-delete from t2 where id <= 2;
-delete from u2 where id <= 2;
-[on master]
-update t2 set i2=100, value=100 where id=1;
-update u2 set i2=100, value=100 where id=1;
-[on slave]
-call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
-call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
-include/wait_for_slave_sql_error.inc [errno=1032]
-select count(*) from t2 force index(primary);
-count(*)
-2
-select count(*) from t2 force index(i1);
-count(*)
-1
-select count(*) from t2 force index(i2);
-count(*)
-2
-select * from t2 where id=1;
-id i1 i2 value
-1 1 100 100
-select i1 from t2 where i1=1;
-i1
-select i2 from t2 where i2=100;
-i2
-100
-select count(*) from u2 force index(primary);
-count(*)
-1
-select count(*) from u2 force index(i1);
-count(*)
-1
-select count(*) from u2 force index(i2);
-count(*)
-1
-select * from u2 where id=1;
-id i1 i2 value
-select i1 from u2 where i1=1;
-i1
-select i2 from u2 where i2=100;
-i2
-include/wait_for_slave_sql_to_start.inc
-
-# some tables with read-free replication on and some with it off
-# secondary keys have extra rows
-
-[on master]
-create table t3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-create table u3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
-insert into u3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
-include/sync_slave_sql_with_master.inc
-[on slave]
-update t3 set i1=100 where id=1;
-update u3 set i1=100 where id=1;
-[on master]
-delete from t3 where id=1;
-delete from u3 where id=1;
-include/sync_slave_sql_with_master.inc
-[on slave]
-select count(*) from t3 force index(primary);
-count(*)
-2
-select count(*) from t3 force index(i1);
-count(*)
-3
-select count(*) from t3 force index(i2);
-count(*)
-2
-select i1 from t3 where i1=100;
-i1
-100
-select count(*) from u3 force index(primary);
-count(*)
-2
-select count(*) from u3 force index(i1);
-count(*)
-2
-select count(*) from u3 force index(i2);
-count(*)
-2
-select i1 from u3 where i1=100;
-i1
-drop table t1, t2, t3, u2, u3;
-drop procedure save_read_stats;
-drop procedure get_read_stats;
-include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result
new file mode 100644
index 00000000000..8cdfa910739
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result
@@ -0,0 +1,56 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists t1;
+create table t0 (a int) engine=myisam;
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int) engine=myisam;
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+pk int primary key,
+kp1 int,
+kp2 int,
+col1 int,
+key (kp1,kp2)
+) engine=rocksdb;
+insert into t2 select a,a,a,a from t1;
+create table t3 like t2;
+insert into t3 select * from t2;
+include/sync_slave_sql_with_master.inc
+set global debug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+update t2 set col1=100 where kp1 between 1 and 3 and mod(kp2,2)=0;
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug = '';
+set sql_log_bin=0;
+delete from t2 where pk=2;
+delete from t2 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+include/sync_slave_sql_with_master.inc
+select * from t2 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 4
+set global debug= 'd,dbug.rocksdb.get_row_by_rowid';
+include/stop_slave.inc
+include/start_slave.inc
+update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0;
+call mtr.add_suppression("Deadlock found when trying to get lock");
+set debug_sync= 'now WAIT_FOR Reached';
+set global debug = '';
+set sql_log_bin=0;
+delete from t3 where pk=2;
+delete from t3 where pk=3;
+set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running';
+include/sync_slave_sql_with_master.inc
+select * from t3 where pk < 5;
+pk kp1 kp2 col1
+0 0 0 0
+1 1 1 1
+4 4 4 100
+drop table t0, t1, t2, t3;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
index eb23b71808b..eac329a24e7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
@@ -138,6 +138,9 @@ __system__ TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
__system__ TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
__system__ TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
__system__ TABLE_FACTORY::INDEX_TYPE #
+__system__ TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+__system__ TABLE_FACTORY::INDEX_SHORTENING #
+__system__ TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
__system__ TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
__system__ TABLE_FACTORY::CHECKSUM #
__system__ TABLE_FACTORY::NO_BLOCK_CACHE #
@@ -147,6 +150,7 @@ __system__ TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
__system__ TABLE_FACTORY::CAPACITY #
__system__ TABLE_FACTORY::NUM_SHARD_BITS #
__system__ TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+__system__ TABLE_FACTORY::MEMORY_ALLOCATOR #
__system__ TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
__system__ TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
__system__ TABLE_FACTORY::PERSISTENT_CACHE #
@@ -211,6 +215,9 @@ cf_t1 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
cf_t1 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
cf_t1 TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
cf_t1 TABLE_FACTORY::INDEX_TYPE #
+cf_t1 TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+cf_t1 TABLE_FACTORY::INDEX_SHORTENING #
+cf_t1 TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
cf_t1 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
cf_t1 TABLE_FACTORY::CHECKSUM #
cf_t1 TABLE_FACTORY::NO_BLOCK_CACHE #
@@ -220,6 +227,7 @@ cf_t1 TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
cf_t1 TABLE_FACTORY::CAPACITY #
cf_t1 TABLE_FACTORY::NUM_SHARD_BITS #
cf_t1 TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+cf_t1 TABLE_FACTORY::MEMORY_ALLOCATOR #
cf_t1 TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
cf_t1 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
cf_t1 TABLE_FACTORY::PERSISTENT_CACHE #
@@ -284,6 +292,9 @@ default TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
default TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
default TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
default TABLE_FACTORY::INDEX_TYPE #
+default TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+default TABLE_FACTORY::INDEX_SHORTENING #
+default TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
default TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
default TABLE_FACTORY::CHECKSUM #
default TABLE_FACTORY::NO_BLOCK_CACHE #
@@ -293,6 +304,7 @@ default TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
default TABLE_FACTORY::CAPACITY #
default TABLE_FACTORY::NUM_SHARD_BITS #
default TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+default TABLE_FACTORY::MEMORY_ALLOCATOR #
default TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
default TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
default TABLE_FACTORY::PERSISTENT_CACHE #
@@ -357,6 +369,9 @@ rev:cf_t2 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
rev:cf_t2 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
rev:cf_t2 TABLE_FACTORY::PIN_TOP_LEVEL_INDEX_AND_FILTER #
rev:cf_t2 TABLE_FACTORY::INDEX_TYPE #
+rev:cf_t2 TABLE_FACTORY::DATA_BLOCK_INDEX_TYPE #
+rev:cf_t2 TABLE_FACTORY::INDEX_SHORTENING #
+rev:cf_t2 TABLE_FACTORY::DATA_BLOCK_HASH_TABLE_UTIL_RATIO #
rev:cf_t2 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
rev:cf_t2 TABLE_FACTORY::CHECKSUM #
rev:cf_t2 TABLE_FACTORY::NO_BLOCK_CACHE #
@@ -366,6 +381,7 @@ rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
rev:cf_t2 TABLE_FACTORY::CAPACITY #
rev:cf_t2 TABLE_FACTORY::NUM_SHARD_BITS #
rev:cf_t2 TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+rev:cf_t2 TABLE_FACTORY::MEMORY_ALLOCATOR #
rev:cf_t2 TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
rev:cf_t2 TABLE_FACTORY::PERSISTENT_CACHE #
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result
index 407a8b103bd..29140f045e4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 10000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result
new file mode 100644
index 00000000000..60d9f69a398
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result
@@ -0,0 +1,31 @@
+create table mz(c int);
+affected rows: 0
+insert into mz values(1);
+affected rows: 1
+commit;
+affected rows: 0
+SET debug= '+d,abort_with_io_write_error';
+affected rows: 0
+set global binlog_error_action=1;
+affected rows: 0
+show session variables like 'debug';
+Variable_name Value
+debug d,abort_with_io_write_error
+affected rows: 1
+show global variables like 'binlog_error_action';
+Variable_name Value
+binlog_error_action ABORT_SERVER
+affected rows: 1
+show global variables like 'skip_core_dump_on_error';
+Variable_name Value
+skip_core_dump_on_error ON
+affected rows: 1
+# crash_during_update
+update mz set c=13;
+ERROR HY000: Binary logging not possible. Message: An error occurred during sync stage of the commit. 'binlog_error_action' is set to 'ABORT_SERVER'. Hence aborting the server.
+# server aborted
+Pattern "mysqld got signal 6" found
+# but no core written
+Pattern "Writing a core file" not found
+drop table mz;
+affected rows: 0
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/statistics.result b/storage/rocksdb/mysql-test/rocksdb/r/statistics.result
index 78344991360..579c4adc11d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/statistics.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/statistics.result
@@ -29,9 +29,9 @@ true
set global rocksdb_force_flush_memtable_now = true;
SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
table_name table_rows
-t1 100000
-t2 4999
-t3 4999
+t1 1000
+t2 1000
+t3 1000
SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
table_name data_length>0 index_length>0
t1 1 1
@@ -39,9 +39,9 @@ t2 1 1
t3 1 1
SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
table_name table_rows
-t1 100000
-t2 4999
-t3 4999
+t1 1000
+t2 1000
+t3 1000
SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
table_name data_length>0 index_length>0
t1 1 1
@@ -58,9 +58,9 @@ test.t5 analyze Error Table 'test.t5' doesn't exist
test.t5 analyze status Operation failed
SELECT table_name, table_rows FROM information_schema.tables WHERE table_schema = DATABASE();
table_name table_rows
-t1 100000
-t2 4999
-t3 4999
+t1 1000
+t2 1000
+t3 1000
SELECT table_name, data_length>0, index_length>0 FROM information_schema.tables WHERE table_schema = DATABASE();
table_name data_length>0 index_length>0
t1 1 1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
index 7f31b4434f5..dcb66a2ab23 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
@@ -1,18 +1,16 @@
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb DATA DIRECTORY = '/foo/bar/data';
-ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+ERROR HY000: Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
show warnings;
Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
Warning 1296 Got error 196 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB
-Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options")
-Warning 1030 Got error 140 "Wrong create options" from storage engine ROCKSDB
CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb INDEX DIRECTORY = '/foo/bar/index';
-ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+ERROR HY000: Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
show warnings;
Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
Warning 1296 Got error 197 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB
-Error 1005 Can't create table `test`.`t1` (errno: 140 "Wrong create options")
-Warning 1030 Got error 140 "Wrong create options" from storage engine ROCKSDB
CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE (id)
(
PARTITION P0 VALUES LESS THAN (1000)
@@ -21,7 +19,12 @@ PARTITION P1 VALUES LESS THAN (2000)
DATA DIRECTORY = '/foo/bar/data/',
PARTITION P2 VALUES LESS THAN (MAXVALUE)
);
-ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+ERROR HY000: Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
+show warnings;
+Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 196 "Unknown error 196")
+Warning 1296 Got error 196 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB
+Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory")
CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id)
(
PARTITION P0 VALUES LESS THAN (1000)
@@ -30,4 +33,9 @@ PARTITION P1 VALUES LESS THAN (2000)
INDEX DIRECTORY = '/foo/bar/data/',
PARTITION P2 VALUES LESS THAN (MAXVALUE)
);
-ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+ERROR HY000: Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
+show warnings;
+Level Code Message
+Error 1005 Can't create table `test`.`t1` (errno: 197 "Unknown error 197")
+Warning 1296 Got error 197 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB
+Error 6 Error on delete of './test/t1.par' (Errcode: 2 "No such file or directory")
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result
new file mode 100644
index 00000000000..67f655b66d7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result
@@ -0,0 +1,620 @@
+#
+# table(hidden key)
+#
+CREATE TABLE t1 (
+a INT,
+b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (b) (
+PARTITION p0 VALUES LESS THAN (3),
+PARTITION p1 VALUES LESS THAN (6),
+PARTITION p2 VALUES LESS THAN MAXVALUE
+);
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+SELECT a FROM t1 WHERE b > 2;
+a
+2
+SELECT b from t1 where a != 3;
+b
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(secondary key)
+#
+CREATE TABLE t1(
+a INT,
+b INT,
+KEY (b)
+) ENGINE=ROCKSDB
+PARTITION BY HASH(a) PARTITIONS 3;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+3 8
+SELECT a FROM t1 WHERE b > 2;
+a
+3
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+3 8
+SELECT a FROM t1 WHERE b > 2;
+a
+3
+SELECT b from t1 where a != 3;
+b
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+6 8
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+8
+SELECT a,b FROM t1;
+a b
+4 1
+6 8
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+6 8
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+6 8
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+4
+8
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(primary key, auto increment)
+#
+CREATE TABLE t1(
+a INT NOT NULL AUTO_INCREMENT,
+b INT,
+PRIMARY KEY(a)
+) ENGINE=ROCKSDB
+PARTITION BY KEY() PARTITIONS 3;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+8
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+4
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(cf)
+#
+CREATE TABLE t1 (
+a INT,
+b INT,
+PRIMARY KEY (`a`, `b`) COMMENT 'testcomment'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(a) (
+PARTITION p0 VALUES IN (1, 4, 7),
+PARTITION p1 VALUES IN (2, 5, 8),
+PARTITION p2 VALUES IN (3, 6, 9)
+);
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+SELECT a FROM t1 WHERE b > 2;
+a
+2
+SELECT b from t1 where a != 3;
+b
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
+#
+# table(reverse cf)
+#
+CREATE TABLE t1 (
+a INT,
+b INT,
+PRIMARY KEY (`a`, `b`) COMMENT 'p0_cfname=rev:foo;p1_cfname=bar;p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(a) (
+PARTITION p0 VALUES IN (1, 4, 7),
+PARTITION p1 VALUES IN (2, 5, 8),
+PARTITION p2 VALUES IN (3, 6, 9)
+);
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+3 8
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+1 1
+2 4
+SELECT a FROM t1 WHERE b > 2;
+a
+2
+SELECT b from t1 where a != 3;
+b
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+a b
+1 1
+SELECT a FROM t1 WHERE b > 2;
+a
+SELECT b from t1 where a != 3;
+b
+1
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+6 8
+SELECT a FROM t1 WHERE b < 5;
+a
+4
+5
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+INSERT INTO t1(a,b) VALUES(7, 1);
+SELECT b from t1 WHERE a > 2;
+b
+1
+1
+4
+SELECT a,b FROM t1;
+a b
+4 1
+5 4
+7 1
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+INSERT INTO t1(a,b) VALUES(8, 4);
+SELECT a,b FROM t1;
+a b
+4 1
+7 1
+8 4
+SELECT b from t1 WHERE a < 9;
+b
+1
+1
+4
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1;
+a b
+8 4
+INSERT INTO t1(a,b) VALUES(9, 8);
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+a b
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+SELECT b FROM t1 WHERE a < 5;
+b
+1
+2
+3
+4
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+b
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result
new file mode 100644
index 00000000000..b0304af8bef
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result
@@ -0,0 +1,45 @@
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+connect conn1, localhost, root,,test;
+connect conn2, localhost, root,,test;
+connection conn1;
+CREATE TABLE t_re (
+a INT, b INT, PRIMARY KEY (a)
+) ENGINE=ROCKSDB
+COMMENT 'ttl_duration=1';
+affected rows: 0
+set global rocksdb_debug_ttl_rec_ts = -13;
+affected rows: 0
+insert into t_re values (1,1);
+affected rows: 1
+insert into t_re values (2,2);
+affected rows: 1
+set global rocksdb_debug_ttl_rec_ts = 0;
+affected rows: 0
+commit;
+affected rows: 0
+set debug_sync='rocksdb.ttl_rows_examined SIGNAL parked WAIT_FOR go';
+affected rows: 0
+SELECT * FROM t_re;
+connection conn2;
+set debug_sync='now WAIT_FOR parked';
+affected rows: 0
+SHOW PROCESSLIST;
+Id User Host db Command Time State Info Progress
+### ### ### ### Query ### debug sync point: rocksdb.ttl_rows_examined SELECT * FROM t_re 0.000
+### ### ### ### Query ### init SHOW PROCESSLIST 0.000
+### ### ### ### Sleep ### NULL 0.000
+affected rows: 3
+set debug_sync='now SIGNAL go';
+affected rows: 0
+connection conn1;
+a b
+affected rows: 0
+set debug_sync='RESET';
+affected rows: 0
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+affected rows: 0
+drop table t_re;
+affected rows: 0
+disconnect conn1;
+disconnect conn2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result b/storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result
index 7397ff64ab1..3a8cf9ed21a 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result
@@ -34,7 +34,7 @@ id select_type table type possible_keys key key_len ref rows Extra
explain
select col1, col2 from t1 where col1 between -8 and 8;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range key1 key1 3 NULL # Using where; Using index
+1 SIMPLE t1 index key1 key1 6 NULL # Using where; Using index
select col1, col2 from t1 where col1 between -8 and 8;
col1 col2
0.3 2.5
@@ -46,7 +46,7 @@ insert into t1 values (10, -8.4, NULL, 'row2-with-null');
explain
select col1, col2 from t1 force index(key1) where col1 is null or col1 < -7;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range key1 key1 3 NULL # Using where; Using index
+1 SIMPLE t1 index key1 key1 6 NULL # Using where; Using index
select col1, col2 from t1 force index(key1) where col1 is null or col1 < -7;
col1 col2
NULL 0.9
@@ -92,7 +92,7 @@ test.t1 analyze status OK
explain
select col1, col2 from t1 force index(key1) where col1 between -800 and 800;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range key1 key1 7 NULL # Using where; Using index
+1 SIMPLE t1 index key1 key1 14 NULL # Using where; Using index
select col1, col2 from t1 force index(key1) where col1 between -800 and 800;
col1 col2
-700.002000 100.006000
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result
new file mode 100644
index 00000000000..8a4ee14c116
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result
@@ -0,0 +1,18 @@
+Checking direct reads
+CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `pk` int(11) NOT NULL DEFAULT 0,
+ `a` int(11) DEFAULT NULL,
+ `b` char(8) DEFAULT NULL,
+ PRIMARY KEY (`pk`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1, 1,'a');
+INSERT INTO t1 (a,b) VALUES (2,'b');
+set global rocksdb_force_flush_memtable_now=1;
+SELECT a,b FROM t1;
+a b
+1 a
+2 b
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
index f4da0b7cb58..085324481b8 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test
@@ -149,3 +149,23 @@ INSERT INTO t1 (a) VALUES (1);
UPDATE t1 SET pk = 3;
ALTER TABLE t1 AUTO_INCREMENT 2;
DROP TABLE t1;
+
+--echo #----------------------------------
+--echo # Issue #902 Debug assert in autoincrement with small field type
+--echo #----------------------------------
+
+SET auto_increment_increment=100, auto_increment_offset=10;
+CREATE TABLE t1(i INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB AUTO_INCREMENT=18446744073709551615;
+# ha_rocksdb::get_auto_increment would assert here
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+ALTER TABLE t1 AUTO_INCREMENT=1;
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+ALTER TABLE t1 AUTO_INCREMENT=18446744073709551615;
+# ha_rocksdb::get_auto_increment would assert here
+--error ER_AUTOINC_READ_FAILED
+INSERT INTO t1 VALUES (NULL);
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.cnf b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf
index a76f1244bab..a76f1244bab 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test
new file mode 100644
index 00000000000..9b5c4571c19
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test
@@ -0,0 +1,3 @@
+let $trx_isolation = READ COMMITTED;
+--source blind_delete_without_tx_api.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf
new file mode 100644
index 00000000000..a76f1244bab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
+
+[mysqld.2]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test
new file mode 100644
index 00000000000..4369f6baa62
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test
@@ -0,0 +1,3 @@
+let $trx_isolation = REPEATABLE READ;
+--source blind_delete_without_tx_api.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.test b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc
index e5f70be4c3b..4f03695bf02 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc
@@ -5,6 +5,7 @@ source include/master-slave.inc;
connection master;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
set @save_rocksdb_blind_delete_primary_key=@@session.rocksdb_blind_delete_primary_key;
set @save_rocksdb_master_skip_tx_api=@@session.rocksdb_master_skip_tx_api;
@@ -43,6 +44,7 @@ SELECT count(*) FROM t1;
--source include/sync_slave_sql_with_master.inc
connection slave;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
SELECT count(*) FROM t1;
connection master;
@@ -109,8 +111,8 @@ call mtr.add_suppression("Slave: Can't find record in 't1'.*");
--source include/wait_for_slave_sql_error.inc
connection slave;
-set @save_rocksdb_read_free_rpl_tables=@@global.rocksdb_read_free_rpl_tables;
-set global rocksdb_read_free_rpl_tables="t.*";
+set @save_rocksdb_read_free_rpl=@@global.rocksdb_read_free_rpl;
+set global rocksdb_read_free_rpl=PK_SK;
START SLAVE;
connection master;
--source include/sync_slave_sql_with_master.inc
@@ -121,7 +123,7 @@ connection master;
# cleanup
connection slave;
-set global rocksdb_read_free_rpl_tables=@save_rocksdb_read_free_rpl_tables;
+set global rocksdb_read_free_rpl=@save_rocksdb_read_free_rpl;
connection master;
SET session rocksdb_blind_delete_primary_key=@save_rocksdb_blind_delete_primary_key;
SET session rocksdb_master_skip_tx_api=@save_rocksdb_master_skip_tx_api;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt
index ef6d0fd554a..a21608c7c1d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt
@@ -1,3 +1,4 @@
--rocksdb_default_cf_options=write_buffer_size=64k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:20
--rocksdb_debug_optimizer_n_rows=1000
--rocksdb_table_stats_sampling_pct=100
+--rocksdb_info_log_level=debug_level
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test
index a15e2a89693..dc2a0da506d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test
@@ -18,6 +18,7 @@ CREATE TABLE `linktable` (
) ENGINE=RocksDB DEFAULT COLLATE=latin1_bin;
--disable_query_log
+call mtr.add_suppression("LibRocksDB");
let $i = 1;
while ($i <= 10000) {
let $insert = INSERT INTO linktable VALUES($i, $i, $i, $i, 1, 1, $i, $i, $i);
@@ -33,9 +34,26 @@ select id1, id2, link_type, visibility, data, time, version from linktable FORCE
select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
# BF len 20
+
+--echo # MariaDB: we don't have optimizer_force_index_for_range, but we can use EITS
+--echo # to get the query plan we want.
+set @tmp_use_stat_tables= @@use_stat_tables;
+set use_stat_tables='preferably';
+analyze table linktable persistent for all;
+flush tables;
+explain select * from linktable;
+--echo # This must use range(id1_type2), key_len=24
+explain
+select id1, id2, link_type, visibility, data, time, version from linktable
+FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
+
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+# MariaDB: no support for optimizer_force_index_for_range:
+#set @tmp_force_index_for_range=@@optimizer_force_index_for_range;
+#set optimizer_force_index_for_range=on;
select id1, id2, link_type, visibility, data, time, version from linktable FORCE INDEX(`id1_type2`) where id1 = 100 and link_type = 1 and time >= 0 and time <= 9223372036854775807 order by time desc;
select case when variable_value-@c > 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+#set global optimizer_force_index_for_range=@tmp_force_index_for_range;
# BF len 13
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt
index efcd69ba5bf..4576d20f45b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt
@@ -1,3 +1,3 @@
--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}
---rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4};
+--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4};bf5_1={prefix_extractor=capped:4}
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
index 00968aebb62..11890dcfbaf 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test
@@ -56,6 +56,31 @@ insert into t4 values (1, 0xFFFF, 0xFFF, 12345);
--echo # This must not fail an assert:
select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc;
-drop table t1,t2,t3,t4;
+--echo #
+--echo # Issue #881: Issue #809 still occurs for reverse scans on forward cfs
+--echo #
+
+# The same as t1 above but uses forward-ordered column family:
+
+create table t5 (
+ id1 bigint not null,
+ id2 bigint not null,
+ id3 varchar(100) not null,
+ id4 int not null,
+ id5 int not null,
+ value bigint,
+ value2 varchar(100),
+ primary key (id1, id2, id3, id4) COMMENT 'bf5_1'
+) engine=ROCKSDB;
+
+insert into t5 select * from t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # An index scan starting from the end of the table:
+explain
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1;
+drop table t1,t2,t3,t4,t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
index 0db5e6d9cc4..b1afc5b2f9d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
@@ -5,6 +5,7 @@
--source include/big_test.inc
--let pk_cf=cf1
+--let pk_cf_name=cf1
--let data_order_desc=0
--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
index 3f085269365..0409784811f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
@@ -53,11 +53,31 @@ INSERT INTO t1 VALUES(1);
INSERT INTO t1 VALUES(2);
INSERT INTO t1 VALUES(20);
INSERT INTO t1 VALUES(21);
+let $ID = `SELECT connection_id()`;
--connection default
--disconnect con1
SELECT * FROM t1;
+--disable_parsing
+# MariaDB: no support for $RPC_PROTOCOL
+if (`SELECT $RPC_PROTOCOL > 0`) {
+ # for --rpc_protocol mode wait for the background detached session to
+ # go away
+ let $wait_condition =
+ SELECT COUNT(*) = 0
+ FROM information_schema.srv_sessions
+ WHERE id = $ID;
+ --source include/wait_condition.inc
+}
+
+if (`SELECT $RPC_PROTOCOL = 0`) {
+ # for non --rpc_protocol mode simply wait until the number of sessions
+ # returns to earlier levels
+ --source include/wait_until_count_sessions.inc
+}
+--enable_parsing
+# MariaDB:
--source include/wait_until_count_sessions.inc
# Note: in MariaDB, session count will be decremented *before*
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
index 67d68ac7a2d..f011964db34 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
@@ -4,6 +4,7 @@
--source include/big_test.inc
--let pk_cf=rev:cf1
+--let pk_cf_name=cf1
--let data_order_desc=0
--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
index 7110fe5f1d7..37f19a39564 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
@@ -4,6 +4,7 @@
--source include/big_test.inc
--let pk_cf=rev:cf1
+--let pk_cf_name=cf1
--let data_order_desc=1
--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
index 6c6e51a2a51..4f3ffd23bd9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
@@ -4,6 +4,7 @@
--source include/big_test.inc
--let pk_cf=cf1
+--let pk_cf_name=cf1
--let data_order_desc=1
--source ../include/bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc
new file mode 100644
index 00000000000..1f5c9fbb3f2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc
@@ -0,0 +1,213 @@
+--source include/have_rocksdb.inc
+
+--source ../include/bypass_create_table.inc
+
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+ no_bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+
+SELECT /*+bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /* +bypassabc*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*aaaaaaaaabbbbbbbbb*/ id1,id2,id1_type,id2_type,data,version
+from link_table WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+byp*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+bypw*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*-b*/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /**/ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+
+--echo # Point query
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2, 3, 4) and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2 IN (2) and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1) and id2 IN (2) and link_type=3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1 IN (1, 2) and id2 IN (2, 3, 4) and link_type=3;
+
+--echo # Prefix range query
+
+--echo # Prefix range query with SK
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME DESC LIMIT 10;
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 3 AND time = 10
+ORDER BY TIME ASC LIMIT 10;
+
+--echo # Prefix range query with SK with limits
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 0,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 1,0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 5,0;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,10;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,5;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,1;
+
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (id1_type)
+WHERE link_type = 3 AND id1 = 1 AND visibility = 4 AND time >= 10
+ORDER BY TIME DESC LIMIT 10,0;
+
+--echo # Prefix range query with PK
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+SELECT /*+ bypass */ id1, id2, link_type FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+
+--echo # Prefix range query with PK + value
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 DESC;
+SELECT /*+ bypass */ id1, id2, link_type, visibility, data, time, version
+FROM link_table FORCE INDEX (PRIMARY)
+WHERE link_type=3 and id1=1 ORDER BY id2 ASC;
+
+--echo # Transaction
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+UPDATE link_table set data="bcd" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+COMMIT;
+
+BEGIN;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+UPDATE link_table set data="cde" WHERE id1=1 and id2=2 and link_type = 3;
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+ROLLBACK;
+
+SELECT /*+ bypass */ id1,id2,id1_type,id2_type,data,version from link_table
+WHERE id1=1 and id2=2 and link_type=3;
+
+--echo # Data types
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3";
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1="1";
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=True;
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=b'1';
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=x'01';
+SELECT /*+ bypass */ id1 FROM link_table where link_type="3" AND id1=NULL;
+
+DROP TABLE count_table;
+DROP TABLE link_table;
+DROP TABLE link_table3;
+DROP TABLE link_table2;
+DROP TABLE id_table;
+DROP TABLE node_table;
+DROP TABLE link_table5;
+DROP TABLE link_table6;
+DROP TABLE link_table4;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test
new file mode 100644
index 00000000000..51064356de7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test
@@ -0,0 +1,3 @@
+--source include/have_rocksdb.inc
+
+--source bypass_select_basic.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt
new file mode 100644
index 00000000000..81bc90b0531
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt
@@ -0,0 +1,3 @@
+--rocksdb_default_cf_options=write_buffer_size=128m;target_file_size_base=32m;max_bytes_for_level_base=512m;level0_file_num_compaction_trigger=4;level0_slowdown_writes_trigger=20;level0_stop_writes_trigger=30;max_write_buffer_number=4;compression_per_level=kLZ4Compression;bottommost_compression=kZSTD;compression_opts=-14:6:0;block_based_table_factory={cache_index_and_filter_blocks=1;filter_policy=bloomfilter:10:false;whole_key_filtering=0};prefix_extractor=capped:12;level_compaction_dynamic_level_bytes=true;optimize_filters_for_hits=true;memtable_prefix_bloom_size_ratio=0.039;max_compaction_bytes=402653184;report_bg_io_stats=true;compaction_pri=kMinOverlappingRatio;soft_pending_compaction_bytes_limit=20480000000
+--rocksdb_override_cf_options=cf_assoc={prefix_extractor=capped:28};cf_assoc_count={prefix_extractor=capped:20};rev:cf_assoc_id1_type={prefix_extractor=capped:20};cf_fbobj_type_id={prefix_extractor=capped:16};cf_assoc_disagg={prefix_extractor=capped:20};__system__={write_buffer_size=16m};
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test
new file mode 100644
index 00000000000..51064356de7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test
@@ -0,0 +1,3 @@
+--source include/have_rocksdb.inc
+
+--source bypass_select_basic.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test b/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test
new file mode 100644
index 00000000000..9afe562f114
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/check_flags.test
@@ -0,0 +1,117 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+
+connect (conn1, localhost, root,,);
+--let $conn1_id = `SELECT CONNECTION_ID()`
+connection default;
+
+CREATE TABLE t1 (id INT, value INT, KEY (id), KEY (value)) ENGINE=ROCKSDB;
+CREATE TABLE t2 (id INT, value INT) ENGINE=ROCKSDB;
+CREATE TABLE t3 (id INT, kp1 INT, PRIMARY KEY (id), KEY(kp1)) ENGINE=ROCKSDB COMMENT='ttl_duration=1';
+
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4), (5,5);
+INSERT INTO t2 SELECT * FROM t1;
+INSERT INTO t3 SELECT * FROM t1;
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi SIGNAL parked WAIT_FOR go';
+send SELECT value FROM t1 WHERE value = 3;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_rmi_scan SIGNAL parked WAIT_FOR go';
+send SELECT DISTINCT(id) FROM t1 WHERE value = 5 AND id IN (1, 3, 5);
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_inwd SIGNAL parked WAIT_FOR go';
+send SELECT value FROM t1 WHERE value > 3;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_rnwd SIGNAL parked WAIT_FOR go';
+send SELECT id FROM t2;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+set debug_sync='RESET';
+
+
+connection conn1;
+set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go';
+send SELECT kp1 FROM t3 ORDER BY kp1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+--echo KILL QUERY \$conn1_id;
+--disable_query_log
+eval KILL QUERY $conn1_id;
+--enable_query_log
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+--error ER_QUERY_INTERRUPTED
+--reap
+
+connection default;
+--disconnect conn1
+
+set debug_sync='RESET';
+
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+
+DROP TABLE t1;
+DROP TABLE t2;
+DROP TABLE t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test
index c2058474b01..963f6c247fa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test
@@ -1,5 +1,8 @@
--source "include/have_rocksdb.inc"
--source "include/have_log_bin.inc"
+# Don't run this with --rpc_protocol because it is doing its own work with
+# the RPC protocol
+--source "include/not_rpc_protocol.inc"
#
# This test was created because 2pc transactions were failing in MyRocks
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test
index c5650359d8c..3ef35cb2633 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test
@@ -7,7 +7,7 @@ USE test;
CREATE TABLE mysql_table (a INT) ENGINE=ROCKSDB;
-- error ER_BLOCK_NO_PRIMARY_KEY
CREATE TABLE IF NOT EXISTS mysql_table_2 (a INT) ENGINE=ROCKSDB;
--- error ER_BLOCK_NO_PRIMARY_KEY
+-- error ER_TABLE_MUST_HAVE_COLUMNS
CREATE TABLE mysql_table_no_cols ENGINE=ROCKSDB;
CREATE TABLE mysql.mysql_table_2 (a INT) ENGINE=ROCKSDB;
@@ -35,10 +35,29 @@ DROP INDEX `PRIMARY` ON mysql_primkey4;
ALTER TABLE mysql.mysql_table ADD PRIMARY KEY (a);
ALTER TABLE mysql.mysql_table DROP PRIMARY KEY;
+SET default_storage_engine=ROCKSDB;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE mysql_noeng(a INT, b INT);
+
+# Disable no_engine_substitution
+SET sql_mode="";
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE mysql_noeng_sub(a INT, b INT) ENGINE=BOGUS_ENGINE;
+
+CREATE TABLE mysql_primkey5 LIKE mysql_primkey;
+
+SET @@global.block_create_no_primary_key = false;
+CREATE TABLE mysql_no_primkey (a INT) ENGINE=ROCKSDB;
+SET @@global.block_create_no_primary_key = true;
+-- error ER_BLOCK_NO_PRIMARY_KEY
+CREATE TABLE mysql_block_no_primkey LIKE mysql_no_primkey;
+
DROP TABLE mysql_primkey;
DROP TABLE mysql_primkey2;
DROP TABLE mysql_primkey3;
DROP TABLE mysql_primkey4;
+DROP TABLE mysql_primkey5;
+DROP TABLE mysql_no_primkey;
USE mysql;
DROP TABLE mysql_table;
DROP TABLE mysql_table_2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/delete_before_lock.test b/storage/rocksdb/mysql-test/rocksdb/t/delete_before_lock.test
deleted file mode 100644
index 93a9d1adaf9..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/t/delete_before_lock.test
+++ /dev/null
@@ -1,36 +0,0 @@
---source include/have_rocksdb.inc
---source include/have_debug_sync.inc
-
-# This is a test case to reproduce https://github.com/facebook/mysql-5.6/issues/162
-# Expected output of the last select for update was (1,2,100) and (1,3,100), but
-# currently it returns (1,2,1) and (1,3,1), which must be fixed.
-
-connect (con, localhost, root,,);
-connection default;
-
---disable_warnings
-set debug_sync='RESET';
-drop table if exists t1;
---enable_warnings
-
-create table t1 (id1 int, id2 int, value int, primary key (id1, id2)) engine=rocksdb;
-insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1), (2, 2, 2);
-
-connection con;
-set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
-send update t1 set value=100 where id1=1;
-
-connection default;
-set debug_sync='now WAIT_FOR parked';
-delete from t1 where id1=1 and id2=1;
-set debug_sync='now SIGNAL go';
-
-connection con;
-reap;
-select * from t1 where id1=1 for update;
-
-# Cleanup
-connection default;
-disconnect con;
-set debug_sync='RESET';
-drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
index 72cf650ca3e..b82c2207117 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
+++ b/storage/rocksdb/mysql-test/rocksdb/t/disabled.def
@@ -25,6 +25,18 @@ create_no_primary_key_table: MariaDB doesn't have --block_create_no_primary_key
explicit_snapshot: MariaDB doesn't support Shared/Explicit snapshots
percona_nonflushing_analyze_debug : Requires Percona Server's Non-flushing ANALYZE feature
com_rpc_tx : Requires connection attributes and detached sessions
+mysqlbinlog_blind_replace: requires @@enable_blind_replace support
+optimize_myrocks_replace_into_base: requires @@enable_blind_replace support
+optimize_myrocks_replace_into_lock: requires @@enable_blind_replace support
+rocksdb.skip_core_dump_on_error: requires @@binlog_error_action support
+bypass_select_basic_bloom : Query bypass is not supported
+bypass_select_basic : Query bypass is not supported
+
+rocksdb_read_free_rpl : Read-Free replication is not supported
+rocksdb_read_free_rpl_stress : Read-Free replication is not supported
+
+blind_delete_rr : Read-Free replication is not supported
+blind_delete_rc : Read-Free replication is not supported
##
## Tests that do not fit MariaDB's test environment. Upstream seems to test
@@ -60,7 +72,6 @@ ddl_high_priority: Needs fractional @@lock_wait_timeout
deadlock_tracking : Needs SHOW ENGINE ROCKSDB TRANSACTION STATUS
bytes_written: Needs I_S.TABLE_STATISTICS.IO_WRITE_BYTES
trx_info_rpl : MariaRocks: @@rpl_skip_tx_api doesn't work, yet.
-rpl_read_free: MDEV-10976
lock_wait_timeout_stats: MDEV-13404
rpl_row_triggers : Requires read-free slave.
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test b/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test
new file mode 100644
index 00000000000..1817bc06fc3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test
@@ -0,0 +1,97 @@
+--source include/have_rocksdb.inc
+
+--source include/have_debug.inc
+--source include/not_valgrind.inc
+
+connect (conn1, localhost, root,,test);
+
+create table t1 (
+ pk int not null primary key,
+ col1 varchar(10)
+) engine=rocksdb;
+
+insert into t1 values (1,1),(2,2),(3,3);
+
+connection conn1;
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+send select * from t1 where pk=1;
+
+--echo # testing unclean shutdown on stuck instance
+connection default;
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where info = 'select * from t1 where pk=1';
+--source include/wait_condition.inc
+--echo # Run shutdown sql command with forcing kill (exit code 127)
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2006,2013
+shutdown 1;
+--source include/wait_until_disconnected.inc
+
+--echo # verifying exit code is printed
+let $error_log=$MYSQLTEST_VARDIR/log/testlog.err;
+let SEARCH_FILE=$error_log;
+--echo # restart the server
+--exec echo "restart:--log-error=$error_log" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--error 2006,2013
+shutdown 230;
+--source include/wait_until_disconnected.inc
+let SEARCH_PATTERN=COM_SHUTDOWN received from host/user = localhost/root, exit code 230;
+--source include/search_pattern_in_file.inc
+
+--echo # restart the server
+--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo # verifying SHUTDOWN is refused if exit code > 255
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 256;
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 10000;
+
+--echo # verifying SHUTDOWN is refused if instances are not read only
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 0 read_only;
+--error ER_UNKNOWN_ERROR
+SHUTDOWN 127 read_only;
+--error 2006,2013
+SHUTDOWN 127;
+--source include/wait_until_disconnected.inc
+
+--echo # restart the server
+--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+connect (conn2, localhost, root,,test);
+
+connection conn2;
+set session debug= "+d,myrocks_busy_loop_on_row_read";
+send select * from t1 where pk=1;
+
+connection default;
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where info = 'select * from t1 where pk=1';
+--source include/wait_condition.inc
+
+SET GLOBAL read_only=1;
+--echo # verifying SHUTDOWN read_only works with read_only instance
+--echo # Run shutdown sql command with forcing kill (exit code 127)
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2006,2013
+shutdown 255 read_only;
+--source include/wait_until_disconnected.inc
+
+--echo # restart the server
+--exec echo "restart:" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+disconnect conn1;
+disconnect conn2;
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt
new file mode 100644
index 00000000000..ba9364e1523
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=1000
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test
new file mode 100644
index 00000000000..a9c44a71edd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test
@@ -0,0 +1,8 @@
+--source include/have_debug.inc
+set global debug="+d,force_group_by";
+
+let $engine=RocksDB;
+--source include/group_min_max.inc
+
+set global debug="-d,force_group_by";
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test b/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test
new file mode 100644
index 00000000000..0d0fad2e5fa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test
@@ -0,0 +1,15 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (a INT, b CHAR(8), KEY ab(a, b)) ENGINE=rocksdb DEFAULT CHARSET utf8mb4 COLLATE utf8mb4_bin;
+INSERT INTO t1 (a,b) VALUES (76,'bar');
+INSERT INTO t1 (a,b) VALUES (35,'foo');
+INSERT INTO t1 (a,b) VALUES (77,'baz');
+
+SET debug_dbug="+d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+
+SELECT b FROM t1 FORCE INDEX(ab) WHERE a=35;
+
+
+SET debug_dbug="-d,dbug.rocksdb.HA_EXTRA_KEYREAD";
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test b/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test
index b2f37a07999..1332fe143d0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test
@@ -4,12 +4,6 @@
# INSERT statements for tables with keys
#
-##################################################
-# TODO:
-# A part of the test is disabled because currently
-# unique indexes are not supported
-##################################################
-
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
@@ -24,12 +18,6 @@ INSERT INTO t1 (a,b) VALUES (3,'a'),(0,'');
SELECT a,b FROM t1;
DROP TABLE t1;
---echo #----------------------------------------
---echo # UNIQUE KEYS are not supported currently
---echo #-----------------------------------------
-
---disable_parsing
-
CREATE TABLE t1 (a INT, b CHAR(8), pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a)) ENGINE=rocksdb;
INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e');
INSERT INTO t1 (a,b) VALUES (100,'a'), (6,'f');
@@ -67,9 +55,11 @@ SELECT a,b FROM t1;
--error ER_DUP_ENTRY
INSERT INTO t1 (a,b) VALUES (101,'x'),(101,'x');
-DROP TABLE t1;
---enable_parsing
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
@@ -91,3 +81,89 @@ SELECT a,b FROM t1;
DROP TABLE t1;
+#
+# INSERT on DUPLICATE KEY UPDATE with multiple keys
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with multiple keys
+--echo
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+#
+# INSERT on DUPLICATE KEY UPDATE with secondary key
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with secondary key
+--echo
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+
+--sorted_result
+SELECT a,b,c,pk FROM t1;
+
+DROP TABLE t1;
+
+--echo
+--echo Disable caching and see if it still functions properly
+--echo
+SELECT @@rocksdb_enable_insert_with_update_caching;
+SET GLOBAL rocksdb_enable_insert_with_update_caching=0;
+SELECT @@rocksdb_enable_insert_with_update_caching;
+
+#
+# INSERT on DUPLICATE KEY UPDATE with multiple keys
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with multiple keys
+--echo
+CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (3,'a'), (4,'a'), (5,'a'), (6,'a'), (7,'a'), (8,'a'), (9,'a'), (10,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a'), (1,'a'), (2,'a') ON DUPLICATE KEY UPDATE b = CONCAT(b, 'a');
+
+--sorted_result
+SELECT a,b FROM t1;
+
+DROP TABLE t1;
+
+#
+# INSERT on DUPLICATE KEY UPDATE with secondary key
+#
+--echo
+--echo INSERT on DUPLICATE KEY UPDATE with secondary key
+--echo
+CREATE TABLE t1 (a INT, b CHAR(8), c INT DEFAULT 0, pk INT AUTO_INCREMENT PRIMARY KEY, UNIQUE INDEX(a,b)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a');
+INSERT INTO t1 (a,b) VALUES (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a'), (1,'a') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (3,'c'), (4,'d'), (5,'e'), (6,'f'), (7,'g'), (8,'h'), (9,'i'), (10,'j') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b'), (1,'a'), (2,'b') ON DUPLICATE KEY UPDATE c = c + 1;
+INSERT INTO t1 (a,b) VALUES (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c'), (1,'a'), (2,'c') ON DUPLICATE KEY UPDATE c = c + 1;
+
+--sorted_result
+SELECT a,b,c,pk FROM t1;
+
+DROP TABLE t1;
+
+--echo
+--echo Cleanup
+--echo
+SET GLOBAL rocksdb_enable_insert_with_update_caching=1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue884.test b/storage/rocksdb/mysql-test/rocksdb/t/issue884.test
new file mode 100644
index 00000000000..6bf3e5177f2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue884.test
@@ -0,0 +1,43 @@
+--source include/have_rocksdb.inc
+
+create table test (
+ a bigint(20) not null,
+ b bigint(20) not null,
+ c varchar(500) not null,
+ d bigint(20) not null,
+ e bigint(20) not null,
+ f varchar(500) not null,
+ g varchar(500) not null,
+ h varchar(500) not null,
+ i varchar(1000) not null,
+ j varchar(16384) not null,
+ k varchar(200) not null,
+ l varchar(500) not null,
+ m varchar(100) not null,
+ n bigint(20) not null,
+ primary key (a, b, m, c(100), l(100), d, e, f(100), g(100), h(100), n),
+ key n (n),
+ key d (d, a)
+) engine = rocksdb default charset = latin1;
+
+--disable_query_log
+let $i = 1000;
+while ($i) {
+ --eval insert into test values (10, 1, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 2, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 3, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 4, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ --eval insert into test values (10, 5, "i", $i / 100, $i, "f", "g", "h", "i", "j", "k", "l", "m", $i);
+ dec $i;
+}
+set global rocksdb_force_flush_memtable_now = true;
+analyze table test;
+--enable_query_log
+
+--replace_column 9 #
+explain
+select * from test where d = 10 and a = 10 and b = 2;
+select * from test where d = 10 and a = 10 and b = 2;
+
+
+drop table test;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue896.test b/storage/rocksdb/mysql-test/rocksdb/t/issue896.test
new file mode 100644
index 00000000000..ba57fb99832
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue896.test
@@ -0,0 +1,17 @@
+# issue 896 : Segmentation fault in myrocks::Rdb_string_reader::read
+--source include/have_rocksdb.inc
+
+CREATE TABLE `t1` (
+`a` bigint(20) NOT NULL,
+`b` varchar(10) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
+`u` bigint(20) unsigned NOT NULL,
+`d` bigint(20) DEFAULT NULL,
+PRIMARY KEY (`a`,`b`),
+KEY `d` (`d`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='ttl_duration=1000;ttl_col=u';
+INSERT INTO t1 VALUES (100, 'aaabbb', UNIX_TIMESTAMP(), 200);
+--replace_column 9 #
+EXPLAIN SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+--echo # segfault here without the fix
+SELECT COUNT(*) FROM t1 FORCE INDEX(d);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue900.test b/storage/rocksdb/mysql-test/rocksdb/t/issue900.test
new file mode 100644
index 00000000000..c420d418c20
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue900.test
@@ -0,0 +1,13 @@
+--source include/have_rocksdb.inc
+
+# Issue 900 : Segmentation fault in myrocks::Rdb_string_reader::read
+CREATE TABLE t1(c1 VARCHAR(1) CHARACTER SET 'utf8' COLLATE 'utf8_bin', c2 YEAR, c3 REAL(1,0) UNSIGNED, PRIMARY KEY(c1)) ENGINE=RocksDB;
+INSERT INTO t1 VALUES(0,'0','0');
+INSERT INTO t1 VALUES('{0}','0','0');
+INSERT INTO t1 VALUES('1','0','1');
+# Would segfault here
+--error ER_DUP_ENTRY
+ALTER TABLE t1 ADD INDEX(c3), ADD UNIQUE (c3);
+--error ER_KEY_DOES_NOT_EXITS
+SELECT c3 FROM t1 FORCE INDEX(c3) ORDER BY c3;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt
new file mode 100644
index 00000000000..d77439930fd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}
+--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:12};
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test
new file mode 100644
index 00000000000..2cced2a1d7a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test
@@ -0,0 +1,29 @@
+#
+# Issue #878: Descending scans from reverse column families return no results
+# due to iterator bounds
+#
+
+create table t (i int primary key) engine=rocksdb;
+
+let $cond=1;
+while ($cond)
+{
+ --disable_query_log
+ truncate table t;
+ --enable_query_log
+ let $cond=`select RIGHT(HEX(index_number), 2) != "FD" from information_schema.rocksdb_ddl where table_name = 't'`;
+}
+
+# Index id is now at FD. Create a table with primary and secondary key, so
+# that the secondary key index id ends in 0xFF.
+
+drop table t;
+create table t (i int primary key, j int, key(j) comment 'rev:bf5_2') engine=rocksdb;
+select RIGHT(HEX(index_number), 2) from information_schema.rocksdb_ddl where table_name = 't';
+
+insert into t values (1, 1);
+
+select j from t order by j asc;
+select j from t order by j desc;
+
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt
new file mode 100644
index 00000000000..418e4c3f056
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test
new file mode 100644
index 00000000000..2b033023b2a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test
@@ -0,0 +1,62 @@
+#
+# This test is intended to check that when blind replace is enabled,
+# mysqlbinlog is able to pass this information in the captured binlog
+# events and we are able to reapply such events
+#
+
+--source include/have_log_bin.inc
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+reset master;
+set GLOBAL binlog_format= 'ROW';
+SET GLOBAL enable_blind_replace=ON;
+set binlog_format=row;
+
+create table t5 (c1 int primary key, c2 int);
+insert into t5 values (1, 1);
+insert into t5 values (2, 2);
+insert into t5 values (3, 3);
+select * from t5;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t5 values (1, 11);
+replace into t5 values (2, 22);
+replace into t5 values (3, 33);
+
+# Ensure that this was a blind replace
+select case when variable_value-@c = 3 then 'true' else 'false' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t5;
+--source include/show_binlog_events.inc
+
+flush logs;
+
+# Capture binlog events using mysqlbinlog
+let $MYSQLD_DATADIR= `select @@datadir`;
+--let $log_file_name = query_get_value("SHOW BINARY LOGS", Log_name, 1)
+--exec $MYSQL_BINLOG $MYSQLD_DATADIR/$log_file_name > $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output
+
+# Drop the table. This will be recreated when we reapply binlog events
+drop table t5;
+reset master;
+
+# Now replay the binlog events
+--echo Replaying binlog events containing blind replace statements should work
+--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output
+select * from t5;
+--source include/show_binlog_events.inc
+
+drop table t5;
+reset master;
+
+# Replay the same binlog events again, but with blind_replace turned off
+--echo Replaying the same binlog events with blind replace disabled should work
+--echo The server should internally convert such events into updates
+SET GLOBAL enable_blind_replace=OFF;
+--exec $MYSQL < $MYSQLTEST_VARDIR/tmp/mysqlbinlog-output
+select * from t5;
+
+--source include/show_binlog_events.inc
+
+set GLOBAL binlog_format=DEFAULT;
+SET GLOBAL enable_blind_replace=DEFAULT;
+drop table t5;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
index 4947ffb59b8..473bebdda89 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test
@@ -9,7 +9,7 @@ connect (con1,localhost,root,,);
connect (con2,localhost,root,,);
connection con1;
-create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4)) engine=rocksdb;
+create table r1 (id1 int, id2 int, id3 varchar(100), id4 int, value1 int, value2 int, value3 int, value4 int, primary key (id1, id2, id3, id4), KEY (value1, value2, value3)) engine=rocksdb;
insert into r1 values (1,1,1,1,1,1,1,1);
insert into r1 values (1,1,1,2,2,2,2,2);
insert into r1 values (1,1,2,1,3,3,3,3);
@@ -32,8 +32,12 @@ BEGIN;
insert into r1 values (5,5,5,5,5,5,5,5);
update r1 set value1=value1+100 where id1=1 and id2=1 and id3='1';
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load test
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
+--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --rocksdb --order-by-primary-desc --rocksdb_bulk_load --rocksdb_bulk_load_allow_sk test
+
rollback;
connection con1;
@@ -44,11 +48,13 @@ source include/search_pattern_in_file.inc;
set @save_default_storage_engine=@@global.default_storage_engine;
SET GLOBAL default_storage_engine=rocksdb;
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key test
source include/search_pattern_in_file.inc;
# Sanity test mysqldump when the --innodb-stats-on-metadata is specified (no effect)
--echo ==== mysqldump with --innodb-stats-on-metadata ====
+--replace_regex /MASTER_LOG_POS=[0-9]+/MASTER_LOG_POS=BINLOG_START/
--exec ASAN_OPTIONS="detect_leaks=0" $MYSQL_DUMP --skip-comments --single-transaction --master-data=2 --print-ordering-key --innodb-stats-on-metadata test
# testing mysqldump work with statement based binary logging
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test
new file mode 100644
index 00000000000..b37f532a21e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test
@@ -0,0 +1,96 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+
+#
+# case 1: table only with primary key, support replace blind write
+#
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+#
+# case 2: table only with primary key but with trigger, not support replace blind write
+#
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+create trigger trg before insert on t1 for each row set @a:=1;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+#
+# case 3: table without primary key, not support replace blind write
+#
+
+create table t1(c1 int,c2 int) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+create table t1(c1 int,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+
+#
+# case 4: table with primary key and secondary key, not support replace blind write
+#
+create table t1(c1 int primary key,c2 int unique) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+create table t1(c1 int primary key,c2 int, key idx1(c2)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+
+
+#
+# case 5: Disabling blind replace through enable_blind_replace should work
+SET GLOBAL enable_blind_replace=OFF;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+select case when variable_value-@c > 1 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+drop table t1;
+
+SET GLOBAL enable_blind_replace=DEFAULT;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test
new file mode 100644
index 00000000000..6cce429a5de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test
@@ -0,0 +1,88 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+# Enable blind replace
+SET GLOBAL enable_blind_replace=ON;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+#
+# case 1: update is blocked by replace into
+#
+connection con1;
+SELECT @@global.enable_blind_replace;
+begin;
+replace into t1 values(1,11);
+
+
+connection con2;
+SELECT @@global.enable_blind_replace;
+begin;
+send update t1 set c2=22 where c1=1;
+
+
+connection default;
+# Check that the above update is blocked
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where state = 'Waiting for row lock' and
+ info = 'update t1 set c2=22 where c1=1';
+--source include/wait_condition.inc
+
+
+connection con1;
+commit;
+
+connection con2;
+--echo # Reap update.
+--reap
+commit;
+select * from t1;
+
+
+#
+# cast 2: replace into is blocked by update
+#
+
+connection con1;
+SELECT @@global.enable_blind_replace;
+begin;
+update t1 set c2=55 where c1=1;
+
+connection con2;
+SELECT @@global.enable_blind_replace;
+begin;
+send replace into t1 values(1,66);
+
+
+connection default;
+# Check that the above replace into is blocked
+let $wait_condition=
+ select count(*) = 1 from information_schema.processlist
+ where state = 'Waiting for row lock' and
+ info = 'replace into t1 values(1,66)';
+--source include/wait_condition.inc
+
+
+connection con1;
+commit;
+
+connection con2;
+--echo # Reap replace into.
+--reap
+commit;
+select * from t1;
+
+connection default;
+drop table t1;
+
+disconnect con1;
+disconnect con2;
+
+# Disable blind replace
+SET GLOBAL enable_blind_replace=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test
index 161f7b566f5..8fa43e15827 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test
@@ -42,6 +42,10 @@ SET @@global.rocksdb_update_cf_options = 'cf1={prefix_extractor=capped:26};';
# Restart no longer needed
SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+# set cf_options for non-existent cf2, cf2 should be created automatically
+SET @@global.rocksdb_update_cf_options = 'cf2={prefix_extractor=capped:28};';
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
# Satisfies can_use_bloom_filter (4+8+8+8), but can't use because the old SST
# files have old prefix extractor
select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
index 13f1bd68a72..96fe1a90bc9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -1114,7 +1114,7 @@ update t1 set a = sleep(100) where pk = 1;
--connect (con1,localhost,root,,)
-let $wait_condition= select State='User sleep' from information_schema.processlist where id=$con_id;
+let $wait_condition= select State='User sleep' from information_schema.processlist where id=$con_id/* or srv_id=$con_id*/;
--source include/wait_condition.inc
--echo kill query \$con_id;
@@ -1251,14 +1251,15 @@ drop table t0, t1;
--echo #
--echo # Check status variables
+--echo # NOTE: We exclude rocksdb_num_get_for_update_calls because it's a debug only status var
--echo #
--replace_column 2 #
-show status like 'rocksdb%';
+show status where variable_name like 'rocksdb%' and variable_name not like '%num_get_for_update%';
-select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%';
+select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
--echo # RocksDB-SE's status variables are global internally
--echo # but they are shown as both session and global, like InnoDB's status vars.
-select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%';
+select VARIABLE_NAME from INFORMATION_SCHEMA.session_status where VARIABLE_NAME LIKE 'rocksdb%' and VARIABLE_NAME NOT LIKE '%num_get_for_update%';
--echo #
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc
new file mode 100644
index 00000000000..55f466a4d31
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc
@@ -0,0 +1,106 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+--source include/count_sessions.inc
+
+# Usage:
+#
+# let $order = ASC; # or DESC
+# let $comment = "rev:cf2"; # or ""
+# --source suite/rocksdb/t/rocksdb_concurrent_delete.inc
+
+let $first_row = -1; # Error this should never happen
+if ($order == 'ASC')
+{
+ let $first_row = 1;
+ let $middle_row = 3;
+ let $end_row = 5;
+}
+if ($order == 'DESC')
+{
+ let $first_row = 5;
+ let $middle_row = 3;
+ let $end_row = 1;
+}
+
+connect (con, localhost, root,,);
+connection default;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+
+SET debug_sync='RESET';
+
+eval CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT $comment, a INT);
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+
+# This will cause the SELECT to block after finding the first row, but
+# before locking and reading it.
+--echo --PK first row delete
+connection con;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE;
+
+# While that connection is waiting, delete the first row (the one con
+# is about to lock and read
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = $first_row;
+
+# Signal the waiting select to continue
+SET debug_sync='now SIGNAL go';
+
+# Now get the results from the select. The first entry (1,1) (or (3,3) when
+# using reverse ordering) should be missing. Prior to the fix the SELECT
+# would have returned: "1815: Internal error: NotFound:"
+connection con;
+reap;
+
+# Deleting a middle row
+--echo --PK middle row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = $middle_row;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+# Deleting the end row
+--echo --PK end row delete
+SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go';
+send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = $end_row;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+
+# Cleanup
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test
index 52f9485e6b7..47818bfdbe1 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test
@@ -1,24 +1,38 @@
+# rocksdb_concurrent_delete test case covers snapshot conflicts, and
+# verifying locking reads not stopping scanning when hitting row not found.
+# The following code coverages are covered.
+#
+# 1. PK full scan (key=NULL)
+# first row, and other rows
+# ha_rnd_next -> rnd_next -> rnd_next_with_direction
+#
+# 2. PK range scan (key=PRIMARY)
+# first row
+# read_range_first -> index_read_map_impl -> read_row_from_primary_key
+# next row
+# index_next -> index_next_with_direction -> rnd_next_with_direction
+#
+# 3. SK full scan
+# first row
+# index_first -> index_first_intern -> index_next_with_direction -> secondary_index_read
+# next row
+# index_next -> index_next_with_direction -> secondary_index_read
+#
+# 4. SK range scan
+# first row
+# read_range_first -> index_read_map_impl -> read_row_from_secondary_key
+# next row
+# index_next -> index_next_with_direction -> secondary_index_read
+#
+# In all cases, RR gets snapshot conflict errors if non-first rows get
+# deleted by another transaction after scanning.
+
--source include/have_rocksdb.inc
--source include/have_debug_sync.inc
-# This validates the fix for Issue #144. The problem was that with more
-# than one client accessing/deleting the same row there was a possibility
-# of client A finding a row (through Next() or Prev()) but the row being
-# deleted before the GetForUpdate() call could occur. When this happened
-# a nearly useless error was being returned.
-
-let $order=ASC;
-let $comment="";
---source include/rocksdb_concurrent_delete.inc
-
-let $order=DESC;
-let $comment="";
---source include/rocksdb_concurrent_delete.inc
+let $isolation_level = REPEATABLE READ;
+--source rocksdb_concurrent_delete_main.inc
-let $order=ASC;
-let $comment="rev:cf2";
---source include/rocksdb_concurrent_delete.inc
+let $isolation_level = READ COMMITTED;
+--source rocksdb_concurrent_delete_main.inc
-let $order=DESC;
-let $comment="rev:cf2";
---source include/rocksdb_concurrent_delete.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc
new file mode 100644
index 00000000000..bcd86af96aa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc
@@ -0,0 +1,30 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+# This validates the fix for Issue #144. The problem was that with more
+# than one client accessing/deleting the same row there was a possibility
+# of client A finding a row (through Next() or Prev()) but the row being
+# deleted before the GetForUpdate() call could occur. When this happened
+# a nearly useless error was being returned.
+
+let $order=ASC;
+let $comment="";
+--source rocksdb_concurrent_delete.inc
+--source rocksdb_concurrent_delete_sk.inc
+
+let $order=DESC;
+let $comment="";
+--source rocksdb_concurrent_delete.inc
+
+let $order=ASC;
+let $comment="rev:cf2";
+--source rocksdb_concurrent_delete.inc
+
+let $order=DESC;
+let $comment="rev:cf2";
+--source rocksdb_concurrent_delete.inc
+
+let $index=PRIMARY;
+--source rocksdb_concurrent_delete_range.inc
+let $index=sk;
+--source rocksdb_concurrent_delete_range.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc
new file mode 100644
index 00000000000..a85527141f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc
@@ -0,0 +1,85 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+--source include/count_sessions.inc
+
+# This is a test case to reproduce https://github.com/facebook/mysql-5.6/issues/162
+# Expected output of the last select for update was (1,2,100) and (1,3,100), but
+# currently it returns (1,2,1) and (1,3,1), which must be fixed.
+
+connect (con, localhost, root,,);
+connection default;
+
+set debug_sync='RESET';
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb;
+insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2);
+
+# deleting a first row
+--echo --First row delete with $index
+connection con;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+send_eval update t1 force index ($index) set value=100 where id1=1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=1;
+set debug_sync='now SIGNAL go';
+
+connection con;
+reap;
+select * from t1 where id1=1;
+
+# deleting a middle row
+--echo --Middle row delete with $index
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+send_eval update t1 force index ($index) set value=200 where id1=1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=3;
+set debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+select * from t1 where id1=1;
+
+# deleting the end row
+--echo --End row delete with $index
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go';
+send_eval update t1 force index ($index) set value=300 where id1=1;
+
+connection default;
+set debug_sync='now WAIT_FOR parked';
+delete from t1 where id1=1 and id2=5;
+set debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+select * from t1 where id1=1;
+
+# Cleanup
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc
new file mode 100644
index 00000000000..ac0b5d76854
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc
@@ -0,0 +1,82 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+--source include/count_sessions.inc
+
+connect (con, localhost, root,,);
+connection default;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+
+SET debug_sync='RESET';
+
+eval CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a));
+INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5);
+
+# This will cause the SELECT to block after finding the first row, but
+# before locking and reading it.
+--echo --SK first row delete
+connection con;
+eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level;
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+
+# While that connection is waiting, delete the first row (the one con
+# is about to lock and read
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = 1;
+
+# Signal the waiting select to continue
+SET debug_sync='now SIGNAL go';
+
+connection con;
+reap;
+
+# Deleting a middle row
+--echo --SK middle row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = 3;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+# Deleting the end row
+--echo --SK end row delete
+SET debug_sync='rocksdb_concurrent_delete_sk SIGNAL parked WAIT_FOR go';
+send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE;
+
+connection default;
+SET debug_sync='now WAIT_FOR parked';
+eval DELETE FROM t1 WHERE pk = 5;
+SET debug_sync='now SIGNAL go';
+
+connection con;
+if ($isolation_level == "REPEATABLE READ")
+{
+ --error ER_LOCK_DEADLOCK
+ reap;
+}
+if ($isolation_level == "READ COMMITTED")
+{
+ reap;
+}
+
+# Cleanup
+connection default;
+disconnect con;
+set debug_sync='RESET';
+drop table t1;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test
index 9a25f39a8e3..ff092773737 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test
@@ -27,8 +27,9 @@ begin;
--connection default
--echo ### Connection default
-let $wait_condition= select 1 from INFORMATION_SCHEMA.PROCESSLIST
- where ID = $ID and STATE = "Waiting for row lock";
+let $wait_condition=
+ select 1 from INFORMATION_SCHEMA.PROCESSLIST
+ where (ID = $ID /* or SRV_ID = $ID */) and STATE = "Waiting for row lock";
--source include/wait_condition.inc
## Waiting for row lock
## select connection_id();
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf
index 13dea1236d8..9ceb0cc0a97 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf
@@ -3,12 +3,14 @@
[mysqld.1]
sync_binlog=0
binlog_format=row
-rocksdb_read_free_rpl_tables="t.*"
+rocksdb_read_free_rpl=PK_SK
slave-exec-mode=strict
+rocksdb_perf_context_level=3
[mysqld.2]
sync_binlog=0
binlog_format=row
-rocksdb_read_free_rpl_tables="t.*"
+rocksdb_read_free_rpl=PK_SK
slave-exec-mode=strict
rocksdb_default_cf_options=write_buffer_size=16k;target_file_size_base=16k
+rocksdb_perf_context_level=3
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test
new file mode 100644
index 00000000000..e1fb9db0b19
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test
@@ -0,0 +1,414 @@
+source include/have_rocksdb.inc;
+source include/master-slave.inc;
+source include/have_debug.inc;
+
+
+connection master;
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+# initialization/insert
+connection master;
+create table t1 (id int primary key, value int);
+insert into t1 values (1,1), (2,2), (3,3), (4,4);
+--source include/sync_slave_sql_with_master.inc
+
+--let $diff_tables= master:t1, slave:t1
+
+--echo
+--echo # regular update/delete. With rocks_read_free_rpl=PK_SK, rocksdb_rows_read does not increase on slaves
+--echo
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set value=value+1 where id=1;
+delete from t1 where id=4;
+select * from t1;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+
+--echo
+--echo # "rocks_read_free_rpl=PK_SK" makes "row not found error" not happen anymore
+--echo
+connection slave;
+--source include/stop_slave.inc
+delete from t1 where id in (2, 3);
+--source include/start_slave.inc
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+connection master;
+update t1 set value=value+1 where id=3;
+delete from t1 where id=2;
+select * from t1;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+
+--echo
+--echo ## tables without primary key -- read free replication should be disabled
+--echo
+--echo
+--echo #no index
+--echo
+connection master;
+drop table t1;
+create table t1 (c1 int, c2 int);
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+--echo
+--echo #secondary index only
+--echo
+connection master;
+drop table t1;
+create table t1 (c1 int, c2 int, index i(c1));
+insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set c2=100 where c1=3;
+delete from t1 where c1 <= 2;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+
+
+--echo
+--echo ## large row operations -- primary key modification, secondary key modification
+--echo
+connection master;
+drop table t1;
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+
+--disable_query_log
+let $i=1;
+while ($i<=10000)
+{
+ eval insert t1(id1,id2,c1,c2,c3,c4,c5,c6,c7)
+ values($i,0,$i,0,0,0,0,0,0);
+ inc $i;
+}
+--enable_query_log
+
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+
+--echo
+--echo #updating all secondary keys by 1
+--echo
+--disable_query_log
+let $i=1;
+while ($i<=10000)
+{
+ eval update t1 set c2=c2+1 where id1=$i and id2=0;
+ inc $i;
+}
+--enable_query_log
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #updating all primary keys by 2
+--echo
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--disable_query_log
+let $i=1;
+while ($i<=10000)
+{
+ eval update t1 set id2=id2+2 where id1=$i and id2=0;
+ inc $i;
+}
+--enable_query_log
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #updating secondary keys after truncating t1 on slave
+--echo
+connection slave;
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set c2=c2+10;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #updating primary keys after truncating t1 on slave
+--echo
+connection slave;
+truncate table t1;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+update t1 set id2=id2+10;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo #deleting half rows
+--echo
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+delete from t1 where id1 <= 5000;
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+--source include/diff_tables.inc
+
+--echo
+--echo # rocksdb_read_free_rpl = PK_ONLY i.e. it only works on tables with only PK
+--echo
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+start slave;
+connection master;
+--echo [on master]
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+
+# make changes on the master
+connection master;
+--echo [on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+
+connection slave;
+--echo [on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+# wait until we have the expected error
+--let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
+--source include/wait_for_slave_sql_error.inc
+
+# query the t2 table on the slave
+connection slave;
+select count(*) from t2 force index(primary);
+select * from t2 where id=1;
+select i1 from t2 where i1=1;
+select i2 from t2 where i2=100;
+
+# query the u2 table on the slave
+select count(*) from u2 force index(primary);
+select count(*) from u2 force index(i1);
+select count(*) from u2 force index(i2);
+select * from u2 where id=1;
+select i1 from u2 where i1=1;
+select i2 from u2 where i2=100;
+
+# the slave replication thread stopped because of the errors;
+# cleanup the problem and restart it
+--disable_query_log
+insert into u2 values(1,1,1,1), (2,2,2,2);
+start slave sql_thread;
+--source include/wait_for_slave_sql_to_start.inc
+--enable_query_log
+
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl = PK_SK;
+start slave;
+
+--echo
+--echo # some tables with read-free replication on and some with it off
+--echo
+# We'll set the table filter to all tables starting with 't'
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = "t.*";
+start slave;
+connection master;
+--echo [on master]
+drop table if exists t2;
+drop table if exists u2;
+create table t2 (id int primary key, i1 int, i2 int, value int);
+create table u2 (id int primary key, i1 int, i2 int, value int);
+insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+delete from t2 where id <= 2;
+delete from u2 where id <= 2;
+
+# make changes on the master
+connection master;
+--echo [on master]
+update t2 set i2=100, value=100 where id=1;
+update u2 set i2=100, value=100 where id=1;
+
+connection slave;
+--echo [on slave]
+call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
+call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
+# wait until we have the expected error
+--let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
+--source include/wait_for_slave_sql_error.inc
+
+# query the t2 table on the slave
+connection slave;
+select count(*) from t2 force index(primary);
+select * from t2 where id=1;
+select i1 from t2 where i1=1;
+select i2 from t2 where i2=100;
+
+# query the u2 table on the slave
+select count(*) from u2 force index(primary);
+select * from u2 where id=1;
+select i1 from u2 where i1=1;
+select i2 from u2 where i2=100;
+
+# the slave replication thread stopped because of the errors;
+# cleanup the problem and restart it
+--disable_query_log
+insert into u2 values(1,1,1,1), (2,2,2,2);
+start slave sql_thread;
+--source include/wait_for_slave_sql_to_start.inc
+--enable_query_log
+
+connection slave;
+--echo [on slave]
+stop slave;
+set @@global.rocksdb_read_free_rpl_tables = ".*";
+start slave;
+
+--echo
+--echo # secondary keys lose rows
+--echo
+connection master;
+--echo [on master]
+create table t3 (id int primary key, i1 int, i2 int, value int, index(i1),
+index(i2));
+insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+delete from t3 where id <= 2;
+
+# make changes on the master
+connection master;
+--echo [on master]
+update t3 set i2=100, value=100 where id=1;
+
+# make sure the slave is caught up
+--source include/sync_slave_sql_with_master.inc
+
+# query the t3 table on the slave
+connection slave;
+select count(*) from t3 force index(primary);
+select count(*) from t3 force index(i1);
+select count(*) from t3 force index(i2);
+select * from t3 where id=1;
+select i1 from t3 where i1=1;
+select i2 from t3 where i2=100;
+
+--echo
+--echo # secondary keys have extra rows
+--echo
+connection master;
+--echo [on master]
+create table t4 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
+insert into t4 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+--source include/sync_slave_sql_with_master.inc
+
+# make a mismatch between the slave and the master
+connection slave;
+--echo [on slave]
+update t4 set i1=100 where id=1;
+
+# make changes on the master
+connection master;
+--echo [on master]
+delete from t4 where id=1;
+
+# make sure the slave is caught up
+--source include/sync_slave_sql_with_master.inc
+
+# query the t4 table on the slave
+connection slave;
+--echo [on slave]
+select count(*) from t4 force index(primary);
+select count(*) from t4 force index(i1);
+select count(*) from t4 force index(i2);
+select i1 from t4 where i1=100;
+
+--echo
+--echo # inserts are also read-free
+--echo
+connection master;
+--echo [on master]
+drop table if exists t2;
+drop table if exists t3;
+create table t2 (id int primary key, i1 int, i2 int);
+create table t3 (id int primary key, i1 int, i2 int, key(i1));
+connection slave;
+select variable_value into @up from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+connection master;
+insert into t2 values(1, 1, 1);
+insert into t2 values(2, 2, 2);
+insert into t3 values(1, 1, 1);
+insert into t3 values(2, 2, 2);
+--source include/sync_slave_sql_with_master.inc
+connection slave;
+select case when variable_value-@up > 0 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t2;
+select * from t3;
+
+# cleanup
+connection master;
+drop table t1, t2, t3, t4, u2;
+
+--source include/rpl_end.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf
new file mode 100644
index 00000000000..f225d5dd71b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf
@@ -0,0 +1,17 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+sync_binlog=0
+binlog_format=row
+rocksdb_perf_context_level=3
+
+[mysqld.2]
+sync_binlog=0
+binlog_format=row
+slave-exec-mode=strict
+rocksdb_perf_context_level=3
+slave_use_idempotent_for_recovery=YES
+slave_parallel_workers=8
+mts_dependency_replication=STMT
+mts_dependency_order_commits=0
+slave_tx_isolation=READ-COMMITTED
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc
new file mode 100644
index 00000000000..e69bcce72d8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc
@@ -0,0 +1,69 @@
+
+disable_query_log;
+
+# Create a schema with different kinds of tables (with different kinds of keys)
+connection master;
+create table t1(a int, b int, c int, d int); #no keys
+create table t2(a int primary key, b int, c int, d int); #only pk
+create table t3(a int, b int, c int, d int, key(b)); #only sk
+create table t4(a int, b int unique, c int, d int); #only unique sk
+create table t5(a int primary key, b int, c int, d int, key(b)); #pk + sk
+create table t6(a int primary key, b int unique, c int, d int); #pk + unique sk
+create table t7(a int, b int unique, c int, d int, key(c)); #sk + unique sk
+create table t8(a int primary key, b int unique, c int, d int, key(c)); #pk + sk + unique sk
+
+# Insert a bunch of rows
+let $iter = 0;
+while ($iter < 1000) {
+ let $t = 1;
+ while ($t <= 8) {
+ eval insert into t$t values($iter, $iter, $iter, $iter);
+ inc $t;
+ }
+ inc $iter;
+}
+
+let $iter = 0;
+while ($iter < 10) {
+ let $t = 1;
+ while ($t <= 8) {
+ eval update t$t set a = a + 10000 where a > 900; # update pk (if any)
+ eval update t$t set b = b + 10000 where b > 900; # update sk or unique (if any)
+ eval update t$t set c = c + 10000 where c > 900; # update sk or unique(if any)
+ eval update t$t set d = d + 10000 where d > 900; # update non key col
+
+ eval delete from t$t where a < 25;
+ eval delete from t$t where b < 50;
+ eval delete from t$t where c < 75;
+ eval delete from t$t where d < 100;
+
+ # Re-insert the deleted rows
+ let $i = 0;
+ while ($i < 100) {
+ eval insert into t$t values($i, $i, $i, $i);
+ inc $i;
+ }
+ inc $t;
+ }
+ inc $iter;
+}
+source include/sync_slave_sql_with_master.inc;
+
+connection master;
+let $t = 1;
+while ($t <= 8) {
+ let $diff_tables = master:t$t, slave:t$t;
+ source include/diff_tables.inc;
+ inc $t;
+}
+
+# Cleanup
+connection master;
+let $t = 1;
+while ($t <= 8) {
+ eval drop table t$t;
+ inc $t;
+}
+source include/sync_slave_sql_with_master.inc;
+
+enable_query_log;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test
new file mode 100644
index 00000000000..31e65db8d5d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test
@@ -0,0 +1,22 @@
+source include/have_rocksdb.inc;
+source include/master-slave.inc;
+source include/not_valgrind.inc;
+
+connection slave;
+source include/stop_slave.inc;
+set @@global.rocksdb_read_free_rpl = PK_SK;
+source include/start_slave.inc;
+source rocksdb_read_free_rpl_stress.inc;
+
+connection slave;
+source include/stop_slave.inc;
+set @@global.rocksdb_read_free_rpl = PK_ONLY;
+source include/start_slave.inc;
+source rocksdb_read_free_rpl_stress.inc;
+
+connection slave;
+source include/stop_slave.inc;
+set @@global.rocksdb_read_free_rpl = default;
+source include/start_slave.inc;
+
+source include/rpl_end.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt
new file mode 100644
index 00000000000..8d8ae3d65f3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt
@@ -0,0 +1 @@
+--rocksdb_lock_wait_timeout=2
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test
new file mode 100644
index 00000000000..d47af90d842
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test
@@ -0,0 +1,78 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+
+SET @@global.rocksdb_rollback_on_timeout = 1;
+show variables like 'rocksdb_rollback_on_timeout';
+
+create table t1 (a int unsigned not null primary key) engine = rocksdb;
+insert into t1 values (1);
+commit;
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con2;
+begin work;
+insert into t1 values (5);
+insert into t1 values (6);
+
+update t1 set a = a + 1 where a = 1;
+
+connection con1;
+begin work;
+insert into t1 values (7);
+insert into t1 values (8);
+
+# This statement will time out. The whole transaction will be
+# rolled back. So values 7 and 8 are not inserted.
+--error ER_LOCK_WAIT_TIMEOUT
+update t1 set a = a + 1 where a = 1;
+
+select * from t1;
+commit;
+
+connection con2;
+select * from t1;
+commit;
+
+connection default;
+select * from t1;
+
+SET @@global.rocksdb_rollback_on_timeout = 0;
+show variables like 'rocksdb_rollback_on_timeout';
+
+connection con2;
+begin work;
+insert into t1 values (9);
+insert into t1 values (10);
+
+update t1 set a = a + 1 where a = 2;
+
+connection con1;
+begin work;
+insert into t1 values (11);
+insert into t1 values (12);
+
+# This statement will time out. Only this statement will be
+# rolled back. So values 11 and 12 are inserted.
+--error ER_LOCK_WAIT_TIMEOUT
+update t1 set a = a + 1 where a = 2;
+
+select * from t1;
+commit;
+
+connection con2;
+select * from t1;
+commit;
+
+connection default;
+select * from t1;
+
+SET @@global.rocksdb_rollback_on_timeout = DEFAULT;
+
+drop table t1;
+disconnect con1;
+disconnect con2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.test
deleted file mode 100644
index 38fb3c32149..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_read_free.test
+++ /dev/null
@@ -1,302 +0,0 @@
---source include/have_rocksdb.inc
-
-source include/master-slave.inc;
-
-
-connection master;
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-# initialization/insert
-connection master;
---source init_stats_procedure.inc
-
-create table t1 (id int primary key, value int);
-insert into t1 values (1,1), (2,2), (3,3), (4,4);
---source include/sync_slave_sql_with_master.inc
-
---let $diff_tables= master:t1, slave:t1
-
---echo
---echo # regular update/delete. With rocks_read_free_rpl_tables=.*, rocksdb_rows_read does not increase on slaves
---echo
-connection slave;
-call save_read_stats();
-connection master;
-update t1 set value=value+1 where id=1;
-delete from t1 where id=4;
-select * from t1;
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-select * from t1;
-
-
---echo
---echo # "rocks_read_free_rpl_tables=.*" makes "row not found error" not happen anymore
---echo
-connection slave;
---source include/stop_slave.inc
-delete from t1 where id in (2, 3);
---source include/start_slave.inc
-call save_read_stats();
-
-connection master;
-update t1 set value=value+1 where id=3;
-delete from t1 where id=2;
-select * from t1;
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-select * from t1;
-
-
---echo
---echo ## tables without primary key -- read free replication should be disabled
---echo
---echo
---echo #no index
---echo
-connection master;
-drop table t1;
-create table t1 (c1 int, c2 int);
-insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call save_read_stats();
-connection master;
-update t1 set c2=100 where c1=3;
-delete from t1 where c1 <= 2;
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-select * from t1;
-
---echo
---echo #secondary index only
---echo
-connection master;
-drop table t1;
-create table t1 (c1 int, c2 int, index i(c1));
-insert into t1 values (1,1), (2,2),(3,3),(4,4),(5,5);
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call save_read_stats();
-connection master;
-update t1 set c2=100 where c1=3;
-delete from t1 where c1 <= 2;
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-select * from t1;
-
-
-
---echo
---echo ## large row operations -- primary key modification, secondary key modification
---echo
-connection master;
-drop table t1;
-create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
-
---disable_query_log
-let $i=1;
-while ($i<=10000)
-{
- eval insert t1(id1,id2,c1,c2,c3,c4,c5,c6,c7)
- values($i,0,$i,0,0,0,0,0,0);
- inc $i;
-}
---enable_query_log
-
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call save_read_stats();
-connection master;
-
---echo
---echo #updating all seconary keys by 1
---echo
---disable_query_log
-let $i=1;
-while ($i<=10000)
-{
- eval update t1 set c2=c2+1 where id1=$i and id2=0;
- inc $i;
-}
---enable_query_log
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-connection master;
---source include/diff_tables.inc
-
---echo
---echo #updating all primary keys by 2
---echo
-connection slave;
-call save_read_stats();
-connection master;
---disable_query_log
-let $i=1;
-while ($i<=10000)
-{
- eval update t1 set id2=id2+2 where id1=$i and id2=0;
- inc $i;
-}
---enable_query_log
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-connection master;
---source include/diff_tables.inc
-
---echo
---echo #updating secondary keys after truncating t1 on slave
---echo
-connection slave;
-truncate table t1;
-call save_read_stats();
-connection master;
-update t1 set c2=c2+10;
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-connection master;
---source include/diff_tables.inc
-
---echo
---echo #updating primary keys after truncating t1 on slave
---echo
-connection slave;
-truncate table t1;
-call save_read_stats();
-connection master;
-update t1 set id2=id2+10;
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-connection master;
---source include/diff_tables.inc
-
---echo
---echo #deleting half rows
---echo
-connection slave;
-call save_read_stats();
-connection master;
-delete from t1 where id1 <= 5000;
---source include/sync_slave_sql_with_master.inc
-connection slave;
-call get_read_stats();
-connection master;
---source include/diff_tables.inc
-
-#--echo
-#--echo # some tables with read-free replication on and some with it off
-#--echo # secondary keys lose rows
-#--echo
-# The configuration is set up so the slave will do read-free replication on
-# all tables starting with 't'
-connection master;
---echo [on master]
-create table t2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-create table u2 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-insert into t2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
-insert into u2 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
---source include/sync_slave_sql_with_master.inc
-
-# make a mismatch between the slave and the master
-connection slave;
---echo [on slave]
-delete from t2 where id <= 2;
-delete from u2 where id <= 2;
-
-# make changes on the master
-connection master;
---echo [on master]
-update t2 set i2=100, value=100 where id=1;
-update u2 set i2=100, value=100 where id=1;
-
-connection slave;
---echo [on slave]
-call mtr.add_suppression("Slave SQL.*Could not execute Update_rows event on table test.u2.*Error_code.*");
-call mtr.add_suppression("Slave: Can't find record in 'u2'.*");
-# wait until we have the expected error
---let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
---source include/wait_for_slave_sql_error.inc
-
-# query the t2 table on the slave
-connection slave;
-select count(*) from t2 force index(primary);
-select count(*) from t2 force index(i1);
-select count(*) from t2 force index(i2);
-select * from t2 where id=1;
-select i1 from t2 where i1=1;
-select i2 from t2 where i2=100;
-
-# query the u2 table on the slave
-select count(*) from u2 force index(primary);
-select count(*) from u2 force index(i1);
-select count(*) from u2 force index(i2);
-select * from u2 where id=1;
-select i1 from u2 where i1=1;
-select i2 from u2 where i2=100;
-
-# the slave replication thread stopped because of the errors;
-# cleanup the problem and restart it
---disable_query_log
-insert into u2 values(1,1,1,1), (2,2,2,2);
-start slave sql_thread;
---source include/wait_for_slave_sql_to_start.inc
---enable_query_log
-
---echo
---echo # some tables with read-free replication on and some with it off
---echo # secondary keys have extra rows
---echo
-connection master;
---echo [on master]
-create table t3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-create table u3 (id int primary key, i1 int, i2 int, value int, index(i1), index(i2));
-insert into t3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
-insert into u3 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
---source include/sync_slave_sql_with_master.inc
-
-# make a mismatch between the slave and the master
-connection slave;
---echo [on slave]
-update t3 set i1=100 where id=1;
-update u3 set i1=100 where id=1;
-
-# make changes on the master
-connection master;
---echo [on master]
-delete from t3 where id=1;
-delete from u3 where id=1;
-
-# make sure the slave is caught up
---source include/sync_slave_sql_with_master.inc
-
-# query the t3 table on the slave
-connection slave;
---echo [on slave]
-select count(*) from t3 force index(primary);
-select count(*) from t3 force index(i1);
-select count(*) from t3 force index(i2);
-select i1 from t3 where i1=100;
-
-# query the u3 table on the slave
-select count(*) from u3 force index(primary);
-select count(*) from u3 force index(i1);
-select count(*) from u3 force index(i2);
-select i1 from u3 where i1=100;
-
-# cleanup
-connection master;
-drop table t1, t2, t3, u2, u3;
---source drop_stats_procedure.inc
-
---source include/rpl_end.inc
-
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf
new file mode 100644
index 00000000000..110d18abac7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+binlog_format=row
+transaction_isolation=read-committed
+[mysqld.2]
+binlog_format=row
+slave_parallel_workers=4
+slave_exec_mode=SEMI_STRICT
+rocksdb_lock_wait_timeout=5
+transaction_isolation=read-committed
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test
new file mode 100644
index 00000000000..36188427585
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test
@@ -0,0 +1,4 @@
+--source include/have_binlog_format_row.inc
+
+--source rpl_row_not_found.inc
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf
index d20d3396f0a..b0a37fd30ad 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf
@@ -6,7 +6,7 @@ gtid_mode=ON
enforce_gtid_consistency
log_slave_updates
binlog_row_image=FULL
-rocksdb_read_free_rpl_tables=.*
+rocksdb_read_free_rpl=PK_SK
rocksdb_strict_collation_check=0
[mysqld.2]
binlog_format=row
@@ -14,6 +14,6 @@ gtid_mode=ON
enforce_gtid_consistency
log_slave_updates
binlog_row_image=FULL
-rocksdb_read_free_rpl_tables=.*
+rocksdb_read_free_rpl=PK_SK
rocksdb_strict_collation_check=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test
index 4eb02ac648a..80bae00424b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test
@@ -1,4 +1,8 @@
--source include/have_rocksdb.inc
+# Don't run the RQG tests with --rpc_protocol because the connections and
+# queries will be coming from Perl where we don't have any ability to
+# specify the query attributes needed for the RPC protocol.
+--source include/not_rpc_protocol.inc
# RQG's examples test
let $TESTDIR = examples;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
index 16d978c71b7..2e560c86c62 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
@@ -1,4 +1,8 @@
--source include/have_rocksdb.inc
+# Don't run the RQG tests with --rpc_protocol because the connections and
+# queries will be coming from Perl where we don't have any ability to
+# specify the query attributes needed for the RPC protocol.
+--source include/not_rpc_protocol.inc
call mtr.add_suppression("Did not write failed ");
call mtr.add_suppression("Can't open and lock privilege tables");
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test
index f29ddcb8c81..383b9aed39f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test
@@ -1,4 +1,8 @@
--source include/have_rocksdb.inc
+# Don't run the RQG tests with --rpc_protocol because the connections and
+# queries will be coming from Perl where we don't have any ability to
+# specify the query attributes needed for the RPC protocol.
+--source include/not_rpc_protocol.inc
call mtr.add_suppression("Deadlock found when trying to get lock");
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt
new file mode 100644
index 00000000000..c07b063f07c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=off --binlog_format=row --log-bin
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test
new file mode 100644
index 00000000000..451eed057ac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test
@@ -0,0 +1,53 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+--source include/not_valgrind.inc
+
+--enable_connect_log
+--enable_info
+
+# setup search pattern and file (new log error file)
+--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/skip_core_dump_on_error.err
+
+# restart the server with the custom error log file
+--let $_mysqld_option=--log-error=$SEARCH_FILE --default-storage-engine=rocksdb
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--source include/restart_mysqld_with_option.inc
+
+# setup
+create table mz(c int);
+insert into mz values(1);
+commit;
+
+# simulate a write error
+SET debug= '+d,abort_with_io_write_error';
+
+# we want to abort server if we fail to write (ABORT_SERVER)
+set global binlog_error_action=1;
+
+# diplay the values of the key parameters
+show session variables like 'debug';
+show global variables like 'binlog_error_action';
+show global variables like 'skip_core_dump_on_error';
+
+--echo # crash_during_update
+# tell client that crash is expected
+--error 1598
+# run an update to trigger a write error
+update mz set c=13;
+
+# should find server abort (prints: Pattern "..." found)
+--echo # server aborted
+--let SEARCH_PATTERN=mysqld got signal 6
+--source include/search_pattern.inc
+
+# should not find a core dump (prints: Pattern "..." not found)
+--echo # but no core written
+--let SEARCH_PATTERN=Writing a core file
+--source include/search_pattern.inc
+
+--let _$mysqld_option=
+--source include/start_mysqld.inc
+--remove_file $SEARCH_FILE
+
+# tidy up
+drop table mz;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test
index 99cb2253d94..a7be5c9a7ac 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test
@@ -34,6 +34,7 @@ CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY) ENGINE=rocksdb PARTITION BY RANGE
DATA DIRECTORY = '/foo/bar/data/',
PARTITION P2 VALUES LESS THAN (MAXVALUE)
);
+show warnings;
--error ER_CANT_CREATE_TABLE
CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id)
@@ -44,3 +45,4 @@ CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE
INDEX DIRECTORY = '/foo/bar/data/',
PARTITION P2 VALUES LESS THAN (MAXVALUE)
);
+show warnings;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc
new file mode 100644
index 00000000000..2193aa9f1de
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc
@@ -0,0 +1,102 @@
+
+# Truncate table multiple times
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+TRUNCATE TABLE t1;
+
+# Truncate partition multiple times
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+
+# TRUNCATE multiple times
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+
+# TRUNCATE multiple times
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+
+# TRUNCATE different partition
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+TRUNCATE TABLE t1;
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+
+# Insert value once and truncate multiple times
+INSERT INTO t1 (a,b) VALUES (1, 1), (2, 4), (3, 8);
+SELECT a,b FROM t1 ORDER BY a;
+
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+SELECT a,b FROM t1 ORDER BY a;
+SELECT a FROM t1 WHERE b > 2;
+SELECT b from t1 where a != 3;
+
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+SELECT a,b FROM t1 ORDER BY b;
+SELECT a FROM t1 WHERE b > 2;
+SELECT b from t1 where a != 3;
+
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+SELECT a,b FROM t1 ORDER BY a;
+
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+
+# Insert value multiple times and truncate multiple times
+INSERT INTO t1 (a,b) VALUES (4, 1), (5, 4), (6, 8);
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT a FROM t1 WHERE b < 5;
+
+ALTER TABLE t1 TRUNCATE PARTITION p2;
+--sorted_result
+SELECT a,b FROM t1;
+INSERT INTO t1(a,b) VALUES(7, 1);
+--sorted_result
+SELECT b from t1 WHERE a > 2;
+--sorted_result
+SELECT a,b FROM t1;
+
+ALTER TABLE t1 TRUNCATE PARTITION p1;
+--sorted_result
+SELECT a,b FROM t1;
+INSERT INTO t1(a,b) VALUES(8, 4);
+--sorted_result
+SELECT a,b FROM t1;
+--sorted_result
+SELECT b from t1 WHERE a < 9;
+
+ALTER TABLE t1 TRUNCATE PARTITION p0;
+--sorted_result
+SELECT a,b FROM t1;
+INSERT INTO t1(a,b) VALUES(9, 8);
+
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+
+TRUNCATE TABLE t1;
+SELECT a,b FROM t1;
+
+# manual commpact
+set global rocksdb_force_flush_memtable_now = true;
+set global rocksdb_compact_cf = 'default';
+
+--disable_query_log
+let $i = 0;
+while($i < 9)
+{
+ inc $i;
+ eval insert t1 values($i, $i);
+}
+--enable_query_log
+--sorted_result
+SELECT b FROM t1 WHERE a < 5;
+
+TRUNCATE TABLE t1;
+SELECT b FROM t1 WHERE a < 5;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test
new file mode 100644
index 00000000000..f9a89517e2a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test
@@ -0,0 +1,83 @@
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# TRUNCATE PARTITION
+#
+
+
+# Hidden Key table
+--echo #
+--echo # table(hidden key)
+--echo #
+CREATE TABLE t1 (
+ a INT,
+ b INT
+) ENGINE=ROCKSDB
+PARTITION BY RANGE (b) (
+ PARTITION p0 VALUES LESS THAN (3),
+ PARTITION p1 VALUES LESS THAN (6),
+ PARTITION p2 VALUES LESS THAN MAXVALUE
+);
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(secondary key)
+--echo #
+CREATE TABLE t1(
+ a INT,
+ b INT,
+ KEY (b)
+) ENGINE=ROCKSDB
+PARTITION BY HASH(a) PARTITIONS 3;
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(primary key, auto increment)
+--echo #
+CREATE TABLE t1(
+ a INT NOT NULL AUTO_INCREMENT,
+ b INT,
+ PRIMARY KEY(a)
+) ENGINE=ROCKSDB
+PARTITION BY KEY() PARTITIONS 3;
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(cf)
+--echo #
+CREATE TABLE t1 (
+ a INT,
+ b INT,
+ PRIMARY KEY (`a`, `b`) COMMENT 'testcomment'
+) ENGINE=ROCKSDB
+ PARTITION BY LIST(a) (
+ PARTITION p0 VALUES IN (1, 4, 7),
+ PARTITION p1 VALUES IN (2, 5, 8),
+ PARTITION p2 VALUES IN (3, 6, 9)
+);
+
+--source truncate_partition.inc
+
+
+--echo #
+--echo # table(reverse cf)
+--echo #
+CREATE TABLE t1 (
+ a INT,
+ b INT,
+ PRIMARY KEY (`a`, `b`) COMMENT 'p0_cfname=rev:foo;p1_cfname=bar;p2_cfname=baz'
+) ENGINE=ROCKSDB
+PARTITION BY LIST(a) (
+ PARTITION p0 VALUES IN (1, 4, 7),
+ PARTITION p1 VALUES IN (2, 5, 8),
+ PARTITION p2 VALUES IN (3, 6, 9)
+);
+
+--source truncate_partition.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test
new file mode 100644
index 00000000000..f9e871ff1c8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test
@@ -0,0 +1,56 @@
+--source include/have_rocksdb.inc
+--source include/have_debug_sync.inc
+
+####
+# Bump rows_examined count whenever MyRocks filters out a row due to expired TTL
+####
+
+# clean start
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = -10;
+
+--enable_connect_log
+--enable_info
+
+connect (conn1, localhost, root,,test);
+connect (conn2, localhost, root,,test);
+
+connection conn1;
+
+# create table with TTL policy (1s)
+CREATE TABLE t_re (
+ a INT, b INT, PRIMARY KEY (a)
+) ENGINE=ROCKSDB
+COMMENT 'ttl_duration=1';
+
+# start with 2 rows, expired at the insertion time
+set global rocksdb_debug_ttl_rec_ts = -13;
+insert into t_re values (1,1);
+insert into t_re values (2,2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+commit;
+
+# setup signal to stop in code where we skip expired records
+set debug_sync='rocksdb.ttl_rows_examined SIGNAL parked WAIT_FOR go';
+send SELECT * FROM t_re;
+
+connection conn2;
+set debug_sync='now WAIT_FOR parked';
+
+# display "Rows Examined" before returning from call
+--replace_column 1 ### 2 ### 3 ### 4 ### 6 ### 10 ### 11 ### 12 ###
+--sorted_result
+SHOW PROCESSLIST;
+
+set debug_sync='now SIGNAL go';
+
+connection conn1;
+reap;
+
+# tidy up
+set debug_sync='RESET';
+set global rocksdb_debug_ttl_read_filter_ts = DEFAULT;
+drop table t_re;
+
+disconnect conn1;
+disconnect conn2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test b/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test
index 86ae15924cb..26f34f86f73 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/unique_check.test
@@ -30,7 +30,8 @@ send insert into t1 values (1,2);
connection con1;
let $wait_condition= select 1 from INFORMATION_SCHEMA.PROCESSLIST
- where ID = $ID and STATE = "Waiting for row lock";
+ where (ID = $ID /* or SRV_ID = $ID*/)
+ and STATE = "Waiting for row lock";
--source include/wait_condition.inc
commit;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test
new file mode 100644
index 00000000000..782e2a369a4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test
@@ -0,0 +1,5 @@
+--source include/have_rocksdb.inc
+
+--let $io_option=--rocksdb_use_direct_io_for_flush_and_compaction=1
+
+--source ../include/use_direct_io_option.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test
index c347a85518f..a1b717e85fc 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test
@@ -1,37 +1,5 @@
--source include/have_rocksdb.inc
---perl
-use Cwd 'abs_path';
-
-open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/data_in_shm.inc") or die;
-my $real_path= abs_path($ENV{'MYSQLTEST_VARDIR'});
-my $in_shm= index($real_path, "/dev/shm") != -1;
-print FILE "let \$DATA_IN_SHM= $in_shm;\n";
-close FILE;
-EOF
-
---source $MYSQL_TMP_DIR/data_in_shm.inc
---remove_file $MYSQL_TMP_DIR/data_in_shm.inc
-
-if ($DATA_IN_SHM)
-{
- --skip DATADIR is in /dev/shm, possibly due to --mem
-}
-
---echo Checking direct reads
---let $_mysqld_option=--rocksdb_use_direct_reads=1
---source include/restart_mysqld_with_option.inc
-
-CREATE TABLE t1 (pk INT PRIMARY KEY DEFAULT '0', a INT(11), b CHAR(8)) ENGINE=rocksdb;
-SHOW CREATE TABLE t1;
-INSERT INTO t1 VALUES (1, 1,'a');
-INSERT INTO t1 (a,b) VALUES (2,'b');
-set global rocksdb_force_flush_memtable_now=1;
---sorted_result
-SELECT a,b FROM t1;
-DROP TABLE t1;
-
-# cleanup
---let _$mysqld_option=
---source include/restart_mysqld.inc
+--let $io_option=--rocksdb_use_direct_reads=1
+--source ../include/use_direct_io_option.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
index 8dfbe312ea8..53ba5161d16 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
@@ -1,4 +1,5 @@
--source include/have_rocksdb.inc
+--source include/have_direct_io.inc
call mtr.add_suppression("rocksdb");
call mtr.add_suppression("Aborting");
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh
new file mode 100755
index 00000000000..98a1fecceba
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+COPY_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_copy_log"
+SIGNAL_FILE=${MYSQL_TMP_DIR}/myrocks_hotbackup_signal
+MOVEBACK_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_moveback_log"
+rm -f $COPY_LOG
+rm -f $SIGNAL_FILE
+rm -f $MOVEBACK_LOG
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
index b6735abb0a9..6108cfbb1aa 100755
--- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
@@ -1,5 +1,7 @@
#!/bin/bash
+. suite/rocksdb_hotbackup/include/clean_tmpfiles.sh
+
if [ "$STREAM_TYPE" == 'wdt' ]; then
which wdt >/dev/null 2>&1
if [ $? -ne 0 ]; then
@@ -31,11 +33,8 @@ rm -rf $backup_dir/*
rm -rf $dest_data_dir/
mkdir $dest_data_dir
-COPY_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_copy_log"
+
SIGNAL_CONDITION=""
-SIGNAL_FILE=${MYSQL_TMP_DIR}/myrocks_hotbackup_signal
-rm -f $COPY_LOG
-rm -f $SIGNAL_FILE
if [ "$FRM" == '1' ]; then
suite/rocksdb_hotbackup/include/create_table.sh $COPY_LOG $SIGNAL_FILE 2>&1 &
@@ -49,23 +48,23 @@ if [ "$STREAM_TYPE" == 'tar' ]; then
BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \
--stream=tar --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
$COPY_LOG | tar -xi -C $backup_dir"
-elif [ "$STREAM_TYPE" == 'xbstream' ]; then
- BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \
- --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
- $COPY_LOG | xbstream -x \
- --directory=$backup_dir"
-elif [ "$STREAM_TYPE" == "xbstream_socket" ]; then
- BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --socket=${MASTER_MYSOCK} \
- --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
- $COPY_LOG | xbstream -x \
- --directory=$backup_dir"
-else
+elif [ "$STREAM_TYPE" == 'wdt' ]; then
BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --stream=wdt \
--port=${MASTER_MYPORT} --destination=localhost --backup_dir=$backup_dir \
--avg_mbytes_per_sec=10 --interval=5 $SIGNAL_CONDITION \
--extra_wdt_sender_options='--block_size_mbytes=1' \
--checkpoint_dir=$checkpoint_dir 2> \
$COPY_LOG"
+elif [ "$STREAM_TYPE" == "xbstream_socket" ]; then
+ BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --socket=${MASTER_MYSOCK} \
+ --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
+ $COPY_LOG | xbstream -x \
+ --directory=$backup_dir"
+else
+ BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \
+ --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
+ $COPY_LOG | xbstream -x \
+ --directory=$backup_dir"
fi
echo "myrocks_hotbackup copy phase"
@@ -73,7 +72,6 @@ eval "$BACKUP_CMD"
mkdir ${backup_dir}/test # TODO: Fix skipping empty directories
-MOVEBACK_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_moveback_log"
echo "myrocks_hotbackup move-back phase"
$MYSQL_MYROCKS_HOTBACKUP --move_back --datadir=$dest_data_dir \
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result
new file mode 100644
index 00000000000..31ed2677444
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result
@@ -0,0 +1,21 @@
+include/rpl_init.inc [topology=none]
+include/rpl_default_connections.inc
+create database db1;
+create table db1.t1 (
+`id` int(10) not null auto_increment,
+`k` int(10),
+`data` varchar(2048),
+primary key (`id`),
+key (`k`)
+) engine=rocksdb;
+include/rpl_stop_server.inc [server_number=2]
+myrocks_hotbackup copy phase
+myrocks_hotbackup copy phase
+myrocks_hotbackup move-back phase
+include/rpl_start_server.inc [server_number=2]
+select count(*) from db1.t1;
+count(*)
+250000
+drop database db1;
+drop database db1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc
new file mode 100644
index 00000000000..52456a68140
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc
@@ -0,0 +1,25 @@
+
+source suite/rocksdb_hotbackup/include/setup.inc;
+
+--exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1
+--let $rpl_server_number= 2
+--source include/rpl_stop_server.inc
+
+--error 1
+--exec STREAM_TYPE=xbstream FRM=1 DEBUG_SIGNAL=1 suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--exec STREAM_TYPE=xbstream suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
+--let $rpl_server_number= 2
+--source include/rpl_start_server.inc
+
+connection server_2;
+select count(*) from db1.t1;
+
+connection server_1;
+drop database db1;
+connection server_2;
+drop database db1;
+
+source suite/rocksdb_hotbackup/include/cleanup.inc;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
index 52456a68140..18816c34446 100644
--- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
@@ -1,25 +1,7 @@
+--source include/have_rocksdb.inc
+--source xbstream.inc
+let SEARCH_FILE= $MYSQL_TMP_DIR/myrocks_hotbackup_copy_log;
+let SEARCH_PATTERN= Direct I/O: 0;
+--source include/search_pattern_in_file.inc
-source suite/rocksdb_hotbackup/include/setup.inc;
-
---exec suite/rocksdb_hotbackup/include/load_data.sh 2>&1
---let $rpl_server_number= 2
---source include/rpl_stop_server.inc
-
---error 1
---exec STREAM_TYPE=xbstream FRM=1 DEBUG_SIGNAL=1 suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
-
---exec STREAM_TYPE=xbstream suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
-
---let $rpl_server_number= 2
---source include/rpl_start_server.inc
-
-connection server_2;
-select count(*) from db1.t1;
-
-connection server_1;
-drop database db1;
-connection server_2;
-drop database db1;
-
-source suite/rocksdb_hotbackup/include/cleanup.inc;
-
+--exec suite/rocksdb_hotbackup/include/clean_tmpfiles.sh
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt
new file mode 100644
index 00000000000..4ab98aeabe1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt
@@ -0,0 +1 @@
+--rocksdb_use_direct_reads=ON --rocksdb_use_direct_io_for_flush_and_compaction=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test
new file mode 100644
index 00000000000..41357d68415
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test
@@ -0,0 +1,7 @@
+--source include/have_rocksdb.inc
+--source xbstream.inc
+let SEARCH_FILE= $MYSQL_TMP_DIR/myrocks_hotbackup_copy_log;
+let SEARCH_PATTERN= Direct I/O: 1;
+--source include/search_pattern_in_file.inc
+
+--exec suite/rocksdb_hotbackup/include/clean_tmpfiles.sh
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/combinations b/storage/rocksdb/mysql-test/rocksdb_rpl/combinations
index fe97111940a..05da5c7b8ee 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/combinations
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/combinations
@@ -5,4 +5,8 @@ rocksdb_write_policy=write_committed
[row-write-prepared]
binlog-format=row
rocksdb_write_policy=write_prepared
-rocksdb_commit_time_batch_for_recovery=on
+
+[row-write-committed-slave-gtid-optimized]
+binlog-format=row
+rocksdb_write_policy=write_committed
+slave_gtid_info=optimized
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc
new file mode 100644
index 00000000000..f0c0134e4d1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc
@@ -0,0 +1,37 @@
+
+-- let $engine = ROCKSDB
+
+call mtr.add_suppression("Recovery from master pos");
+
+-- let $debug_option = crash_before_update_pos
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_after_update_pos_before_apply
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_before_writing_xid
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = half_binlogged_transaction
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_before
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_after_log
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_after_prepare
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_reset.inc
+-- let $debug_option = crash_commit_after
+-- source extra/rpl_tests/rpl_gtid_crash_safe.inc
+
+-- source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result
new file mode 100644
index 00000000000..1b41405fd5e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result
@@ -0,0 +1,282 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+c1 c2
+1 1
+2 2
+3 3
+create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t2 values(1,1),(2,2),(3,3);
+select * from t2;
+c1 c2
+1 1
+2 2
+3 3
+create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t3 values(1,1),(2,2),(3,3);
+select * from t3;
+c1 c2
+1 1
+2 2
+3 3
+SET GLOBAL enable_blind_replace=ON;
+create trigger trg before insert on t2 for each row set @a:=1;
+alter table t3 add constraint slave_unique_key unique (c2);
+connect slave
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 1
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(1,11);
+replace into t1 values(2,22);
+replace into t1 values(3,33);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 22
+3 33
+connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 22
+3 33
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 2
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(2,44),(3,55);
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 44
+3 55
+connect slave
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t1;
+c1 c2
+1 11
+2 44
+3 55
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 3
+connect master
+update t1 set c2=66 where c1=3;
+select * from t1;
+c1 c2
+1 11
+2 44
+3 66
+connect slave
+select * from t1;
+c1 c2
+1 11
+2 44
+3 66
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 4
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t2 values(1,111);
+replace into t2 values(2,222);
+replace into t2 values(3,333);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+select * from t2;
+c1 c2
+1 111
+2 222
+3 333
+connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t2;
+c1 c2
+1 111
+2 222
+3 333
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+Case 5
+connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t3 values(1,1111);
+replace into t3 values(2,2222);
+replace into t3 values(3,3333);
+select * from t3;
+c1 c2
+1 1111
+2 2222
+3 3333
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+true
+connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+read_free
+false
+select * from t3;
+c1 c2
+1 1111
+2 2222
+3 3333
+select * from t3 use index (slave_unique_key);
+c1 c2
+1 1111
+2 2222
+3 3333
+Case 6
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query # # use `test`; create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # use `test`; create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # use `test`; create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t2)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t3)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000001 # Query # # use `test`; create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # use `test`; create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # use `test`; create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` trigger trg before insert on t2 for each row set @a:=1
+slave-bin.000001 # Query # # use `test`; alter table t3 add constraint slave_unique_key unique (c2)
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t1)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t2)
+slave-bin.000001 # Update_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Delete_rows # # table_id: #
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Delete_rows # # table_id: #
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+slave-bin.000001 # Query # # BEGIN
+slave-bin.000001 # Table_map # # table_id: # (test.t3)
+slave-bin.000001 # Delete_rows # # table_id: #
+slave-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+slave-bin.000001 # Xid # # COMMIT /* XID */
+drop table t1;
+drop table t2;
+drop table t3;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+SET GLOBAL enable_blind_replace=DEFAULT;
+SET GLOBAL enable_blind_replace=DEFAULT;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result
new file mode 100644
index 00000000000..a770822285b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result
@@ -0,0 +1,165 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Error_code: 1032");
+create table t1 (a int primary key, b int, c int) engine = rocksdb;
+create table t2 (a int unique, b int, c int) engine = rocksdb;
+create table t3 (a int, b int, c int, key(a)) engine = rocksdb;
+create table t4 (a int, b int, c int) engine = rocksdb;
+insert into t1 values(1, 1, 1);
+insert into t2 values(1, 1, 1);
+insert into t3 values(1, 1, 1);
+insert into t4 values(1, 1, 1);
+include/sync_slave_sql_with_master.inc
+set @@sql_log_bin = 0;
+update t1 set c = 2;
+update t2 set c = 2;
+update t3 set c = 2;
+update t4 set c = 2;
+set @@sql_log_bin = 1;
+update t1 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t1 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t2 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t2 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t3 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t3 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t4 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t4 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.slave_rows_search_algorithms = 'INDEX_SCAN,TABLE_SCAN,HASH_SCAN';
+include/start_slave.inc
+create table t1 (a int primary key, b int, c int) engine = rocksdb;
+create table t2 (a int unique, b int, c int) engine = rocksdb;
+create table t3 (a int, b int, c int, key(a)) engine = rocksdb;
+create table t4 (a int, b int, c int) engine = rocksdb;
+insert into t1 values(1, 1, 1);
+insert into t2 values(1, 1, 1);
+insert into t3 values(1, 1, 1);
+insert into t4 values(1, 1, 1);
+include/sync_slave_sql_with_master.inc
+set @@sql_log_bin = 0;
+update t1 set c = 2;
+update t2 set c = 2;
+update t3 set c = 2;
+update t4 set c = 2;
+set @@sql_log_bin = 1;
+update t1 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t1 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t2 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t2 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t3 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t3 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+update t4 set b = 2;
+include/wait_for_slave_sql_error.inc [errno=1032]
+set @@sql_log_bin = 0;
+update t4 set c = 1;
+set @@sql_log_bin = 1;
+include/stop_slave.inc
+include/start_slave.inc
+include/sync_slave_sql_with_master.inc
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+select * from t1;
+a b c
+1 2 1
+select * from t2;
+a b c
+1 2 1
+select * from t3;
+a b c
+1 2 1
+select * from t4;
+a b c
+1 2 1
+drop table t1;
+drop table t2;
+drop table t3;
+drop table t4;
+include/sync_slave_sql_with_master.inc
+include/stop_slave.inc
+set @@global.slave_rows_search_algorithms = DEFAULT;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result
new file mode 100644
index 00000000000..a518de2b6e3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result
@@ -0,0 +1,361 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Recovery from master pos");
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr
+2 test uuid:4
+SET GLOBAL debug = '+d,crash_before_update_pos';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr
+2 test uuid:4
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 mtr
+2 test uuid:5
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_after_update_pos_before_apply';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_before_writing_xid';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,half_binlogged_transaction';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_before';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after_log';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after_prepare';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_reset.inc
+create table t1(a int, PRIMARY KEY(a)) ENGINE=ROCKSDB;
+insert into t1 values(1);
+insert into t1 values(2);
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+SET GLOBAL debug = '+d,crash_commit_after';
+insert into t1 values(3);
+include/rpl_reconnect.inc
+SET GLOBAL debug = ``;
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:3
+use test;
+select * from t1;
+a
+1
+2
+change master to master_auto_position = 1;
+include/start_slave.inc
+rename table t1 to test1;
+use test;
+select * from test1;
+a
+1
+2
+3
+use test;
+select * from test1;
+a
+1
+2
+3
+drop table test1;
+include/stop_slave.inc
+change master to master_auto_position = 0;
+include/start_slave.inc
+use mysql;
+select * from slave_gtid_info;
+Id Database_name Last_gtid
+1 test uuid:4
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result
new file mode 100644
index 00000000000..1f6acf32872
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result
@@ -0,0 +1,43 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+Make changes in master
+create table test1 (a int primary key, b int) engine=rocksdb;
+insert into test1 values (1, 1);
+Make sure slave is up-to-date and mysql.slave_gtid_info is good
+select * from test1;
+a b
+1 1
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+id database_name last_gtid
+1 test UUID:2
+Make changes in master
+insert into test1 values (2, 2);
+Make sure slave is up-to-date and mysql.slave_gtid_info is good
+select @@slave_gtid_info;
+@@slave_gtid_info
+OPTIMIZED
+select * from test1;
+a b
+1 1
+2 2
+select * from mysql.slave_gtid_info;
+Id Database_name Last_gtid
+1 test UUID:3
+Make changes in master
+insert into test1 values (3, 3);
+insert into test1 values (4, 4);
+Make sure slave is up-to-date and mysql.slave_gtid_info is good
+select * from test1;
+a b
+1 1
+2 2
+3 3
+4 4
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+id database_name last_gtid
+1 test UUID:5
+DROP TABLE IF EXISTS test1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/combinations b/storage/rocksdb/mysql-test/rocksdb_rpl/t/combinations
deleted file mode 100644
index f09d338c357..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/combinations
+++ /dev/null
@@ -1,2 +0,0 @@
-[row]
-binlog-format=row
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
index 3896a822872..b0fc68b84b2 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def
@@ -28,4 +28,5 @@ rpl_gtid_crash_safe : Didn't try with MariaDB, yet
rpl_gtid_crash_safe_wal_corrupt : Didn't try with MariaDB, yet
rpl_rocksdb_snapshot : Didn't try with MariaDB, yet
rpl_rocksdb_snapshot_without_gtid : Didn't try with MariaDB, yet
-
+rpl_rocksdb_slave_gtid_info_optimized: requires slave-gtid-info=optimized which is an FB/MySQL-only feature
+rocksdb_slave_check_before_image_consistency: requires slave_check_before_image_consistency feature
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test
new file mode 100644
index 00000000000..82b231d489a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test
@@ -0,0 +1,149 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_debug.inc
+
+connection master;
+SET @prior_rocksdb_perf_context_level = @@rocksdb_perf_context_level;
+SET GLOBAL rocksdb_perf_context_level=3;
+SET GLOBAL enable_blind_replace=ON;
+
+# Create and insert some rows in a table
+create table t1(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t1 values(1,1),(2,2),(3,3);
+select * from t1;
+
+# Create table which has a trigger only in slave
+create table t2(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t2 values(1,1),(2,2),(3,3);
+select * from t2;
+
+# Create table which has a secondary key only in slave
+create table t3(c1 int,c2 int, primary key (c1)) engine=rocksdb;
+insert into t3 values(1,1),(2,2),(3,3);
+select * from t3;
+
+sync_slave_with_master;
+
+# Enable blind replace in both slave and master
+connection slave;
+SET GLOBAL enable_blind_replace=ON;
+create trigger trg before insert on t2 for each row set @a:=1;
+alter table t3 add constraint slave_unique_key unique (c2);
+
+connection master;
+
+sync_slave_with_master;
+--echo connect slave
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 1 - 'replace into' on a table with no triggers or secondary keys. Blind replace optimization should kick in both in master and slave
+--echo Case 1
+connection master;
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+replace into t1 values(1,11);
+replace into t1 values(2,22);
+replace into t1 values(3,33);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+select * from t1;
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 2 - Multiple replaces in a single statement. blind replace optimization should kick in
+connection master;
+--echo Case 2
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t1 values(2,44),(3,55);
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 2 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t1;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 3 - A regular update. This is not a blind replace
+--echo Case 3
+connection master;
+--echo connect master
+update t1 set c2=66 where c1=3;
+select * from t1;
+
+sync_slave_with_master;
+--echo connect slave
+select * from t1;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 4 - Slave has trigger on its table. No triggers on the table in master.
+# Blind replace optimization should kick in on master.
+# Slave should convert this statement into a regular update
+--echo Case 4
+connection master;
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t2 values(1,111);
+replace into t2 values(2,222);
+replace into t2 values(3,333);
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t2;
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t2;
+
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+# Case 5 - Slave has secondary keys on the table. No secondary keys on the table in master
+# Blind replace optimization should kick in on master.
+# Slave should convert this statement into a regular delete_insert
+--echo Case 5
+connection master;
+--echo connect master
+select variable_value into @d from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+replace into t3 values(1,1111);
+replace into t3 values(2,2222);
+replace into t3 values(3,3333);
+select * from t3;
+
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+
+sync_slave_with_master;
+--echo connect slave
+select case when variable_value-@d > 3 then 'false' else 'true' end as read_free from information_schema.global_status where variable_name='rocksdb_num_get_for_update_calls';
+select * from t3;
+select * from t3 use index (slave_unique_key);
+
+# Case 6 - Just to verify all binlog events.
+# blind replace will generate a write_rows event.
+# Or else, it will be a update_rows event or a delete_rows_write_rows event
+--echo Case 6
+connection master;
+--source include/show_binlog_events.inc
+
+connection slave;
+--source include/show_binlog_events.inc
+
+# Cleanup
+connection master;
+drop table t1;
+drop table t2;
+drop table t3;
+SET GLOBAL rocksdb_perf_context_level = @prior_rocksdb_perf_context_level;
+SET GLOBAL enable_blind_replace=DEFAULT;
+
+connection slave;
+SET GLOBAL enable_blind_replace=DEFAULT;
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt
new file mode 100644
index 00000000000..78b517e93ab
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt
@@ -0,0 +1 @@
+--slave_check_before_image_consistency=ON
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test
new file mode 100644
index 00000000000..d7db127a207
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test
@@ -0,0 +1,22 @@
+source include/master-slave.inc;
+source include/have_binlog_format_row.inc;
+
+call mtr.add_suppression("Error_code: 1032");
+
+let $engine= rocksdb;
+
+source extra/rpl_tests/rpl_slave_check_before_image_consistency.inc;
+
+# check detection with HASH_SCAN enabled
+connection slave;
+source include/stop_slave.inc;
+set @@global.slave_rows_search_algorithms = 'INDEX_SCAN,TABLE_SCAN,HASH_SCAN';
+source include/start_slave.inc;
+source extra/rpl_tests/rpl_slave_check_before_image_consistency.inc;
+
+# cleanup
+source include/stop_slave.inc;
+set @@global.slave_rows_search_algorithms = DEFAULT;
+source include/start_slave.inc;
+
+source include/rpl_end.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
index f1b1b16704f..5a3e665a025 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
@@ -4,39 +4,8 @@
-- source include/have_debug.inc
-- source include/not_valgrind.inc
--- let $engine = ROCKSDB
+if (`select count(*) = 1 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) {
+ --skip Test does not support row_write_committed_slave_gtid_optimized policy due to subtle behavioral differences. rpl_gtid_crash_safe_optimized covers slave_gtid_info=optimized.
+}
-call mtr.add_suppression("Recovery from master pos");
-
--- let $debug_option = crash_before_update_pos
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_reset.inc
--- let $debug_option = crash_after_update_pos_before_apply
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_reset.inc
--- let $debug_option = crash_before_writing_xid
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_reset.inc
--- let $debug_option = half_binlogged_transaction
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_reset.inc
--- let $debug_option = crash_commit_before
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_reset.inc
--- let $debug_option = crash_commit_after_log
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_reset.inc
--- let $debug_option = crash_commit_after_prepare
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_reset.inc
--- let $debug_option = crash_commit_after
--- source extra/rpl_tests/rpl_gtid_crash_safe.inc
-
--- source include/rpl_end.inc
+-- source ../include/rpl_gtid_crash_safe.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt
new file mode 100644
index 00000000000..397310d37b4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt
new file mode 100644
index 00000000000..e41dcc5eecd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt
@@ -0,0 +1,2 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --rocksdb_enable_2pc=OFF
+--sync_binlog=1000 --relay_log_recovery=1 --slave_gtid_info=optimized
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test
new file mode 100644
index 00000000000..c262403286c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test
@@ -0,0 +1,11 @@
+-- source include/have_rocksdb.inc
+-- source include/have_gtid.inc
+-- source include/master-slave.inc
+-- source include/have_debug.inc
+-- source include/not_valgrind.inc
+
+if (`select count(*) = 0 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) {
+ --skip Test requires row_write_committed_slave_gtid_optimized policy where slave_gtid_info=optimized
+}
+
+-- source ../include/rpl_gtid_crash_safe.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt
new file mode 100644
index 00000000000..c747adc94d5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt
new file mode 100644
index 00000000000..6cde3c553d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt
@@ -0,0 +1 @@
+--gtid_mode=ON --enforce_gtid_consistency --log_bin --log_slave_updates --slave-gtid-info=optimized
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test
new file mode 100644
index 00000000000..c8a0c8daf10
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test
@@ -0,0 +1,51 @@
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_binlog_format_row.inc
+
+if (`select count(*) = 0 from information_schema.global_variables where variable_name = 'slave_gtid_info' and variable_value = 'optimized';`) {
+ --skip Test requires row_write_committed_slave_gtid_optimized policy where slave_gtid_info=optimized
+}
+
+--echo Make changes in master
+create table test1 (a int primary key, b int) engine=rocksdb;
+insert into test1 values (1, 1);
+
+--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good
+sync_slave_with_master slave;
+connection slave;
+select * from test1;
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+
+--echo Make changes in master
+connection master;
+insert into test1 values (2, 2);
+
+--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good
+sync_slave_with_master slave;
+connection slave;
+select @@slave_gtid_info;
+select * from test1;
+
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+select * from mysql.slave_gtid_info;
+
+--echo Make changes in master
+connection master;
+insert into test1 values (3, 3);
+insert into test1 values (4, 4);
+
+--echo Make sure slave is up-to-date and mysql.slave_gtid_info is good
+sync_slave_with_master slave;
+connection slave;
+select * from test1;
+
+-- replace_regex /[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}/UUID/
+select id, database_name, last_gtid from mysql.slave_gtid_info;
+
+connection master;
+DROP TABLE IF EXISTS test1;
+
+sync_slave_with_master slave;
+
+--source include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result
new file mode 100644
index 00000000000..90fc99ef21a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_CACHE_DUMP;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CACHE_DUMP to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CACHE_DUMP = 444;
+ERROR HY000: Variable 'rocksdb_cache_dump' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result
new file mode 100644
index 00000000000..9d098385789
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result
@@ -0,0 +1,22 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1.0);
+INSERT INTO valid_values VALUES(0.0);
+INSERT INTO valid_values VALUES(0.5);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES(2.0);
+INSERT INTO invalid_values VALUES(-0.5);
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+INSERT INTO invalid_values VALUES('\'0.5\'');
+SET @start_global_value = @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO = 444;
+ERROR HY000: Variable 'rocksdb_cache_high_pri_pool_ratio' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result
new file mode 100644
index 00000000000..819425c8bce
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY = 444;
+ERROR HY000: Variable 'rocksdb_cache_index_and_filter_with_high_priority' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result
index 1d409bbedb4..8d3f4a6e5af 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result
@@ -9,11 +9,11 @@ INSERT INTO invalid_values VALUES('\'bbb\'');
SET @start_global_value = @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SELECT @start_global_value;
@start_global_value
-0
+1
SET @start_session_value = @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SELECT @start_session_value;
@start_session_value
-0
+1
'# Setting to valid values in global scope#'
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1;
@@ -24,7 +24,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@ -34,7 +34,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@ -44,7 +44,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@ -54,7 +54,7 @@ SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
'# Setting to valid values in session scope#'
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1;
@@ -65,7 +65,7 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@ -75,7 +75,7 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@ -85,7 +85,7 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@ -95,27 +95,27 @@ SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
'# Testing with invalid values in global scope #'
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'aaa'"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'aaa';
Got one of the listed errors
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'bbb'"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'bbb';
Got one of the listed errors
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_global_value;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_session_value;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
-0
+1
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result
new file mode 100644
index 00000000000..b840baf29f8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result
@@ -0,0 +1,6 @@
+SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf';
+ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of 'nonexistent_cf'
+SET @@global.ROCKSDB_DELETE_CF = '__system__';
+ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of '__system__'
+SET @@global.ROCKSDB_DELETE_CF = 'default';
+ERROR 42000: Variable 'rocksdb_delete_cf' can't be set to the value of 'default'
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result
new file mode 100644
index 00000000000..ede1690f776
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result
@@ -0,0 +1,75 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 1"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 1;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 0"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 0;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to on"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = on;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to off"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = off;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@session.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 444;
+ERROR HY000: Variable 'rocksdb_enable_insert_with_update_caching' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 'aaa'"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING to 'bbb'"
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+SET @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING = @start_global_value;
+SELECT @@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING;
+@@global.ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result
new file mode 100644
index 00000000000..788379927cf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result
@@ -0,0 +1,58 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('PK_SK');
+INSERT INTO valid_values VALUES('OFF');
+INSERT INTO valid_values VALUES('PK_ONLY');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('a');
+SET @start_global_value = @@global.ROCKSDB_READ_FREE_RPL;
+SELECT @start_global_value;
+@start_global_value
+OFF
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to PK_SK"
+SET @@global.ROCKSDB_READ_FREE_RPL = PK_SK;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+PK_SK
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to OFF"
+SET @@global.ROCKSDB_READ_FREE_RPL = OFF;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to PK_ONLY"
+SET @@global.ROCKSDB_READ_FREE_RPL = PK_ONLY;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+PK_ONLY
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_READ_FREE_RPL = DEFAULT;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_READ_FREE_RPL = 444;
+ERROR HY000: Variable 'rocksdb_read_free_rpl' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL to a"
+SET @@global.ROCKSDB_READ_FREE_RPL = a;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+SET @@global.ROCKSDB_READ_FREE_RPL = @start_global_value;
+SELECT @@global.ROCKSDB_READ_FREE_RPL;
+@@global.ROCKSDB_READ_FREE_RPL
+OFF
+SET GLOBAL ROCKSDB_READ_FREE_RPL=DEFAULT;
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result
index b218fe034aa..2643eb08617 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result
@@ -1,15 +1,13 @@
+call mtr.add_suppression(".*Invalid pattern in rocksdb_read_free_rpl_tables.*");
CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES('a');
INSERT INTO valid_values VALUES('b');
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'*\'');
SET @start_global_value = @@global.ROCKSDB_READ_FREE_RPL_TABLES;
SELECT @start_global_value;
@start_global_value
-
-SET @start_session_value = @@session.ROCKSDB_READ_FREE_RPL_TABLES;
-SELECT @start_session_value;
-@start_session_value
-
+.*
'# Setting to valid values in global scope#'
"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to a"
SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = a;
@@ -20,7 +18,7 @@ a
SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT;
SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
@@global.ROCKSDB_READ_FREE_RPL_TABLES
-
+.*
"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to b"
SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = b;
SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
@@ -30,36 +28,22 @@ b
SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT;
SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
@@global.ROCKSDB_READ_FREE_RPL_TABLES
-
-'# Setting to valid values in session scope#'
-"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL_TABLES to a"
-SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = a;
-SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES;
-@@session.ROCKSDB_READ_FREE_RPL_TABLES
-a
-"Setting the session scope variable back to default"
-SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT;
-SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES;
-@@session.ROCKSDB_READ_FREE_RPL_TABLES
-
-"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL_TABLES to b"
-SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = b;
-SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES;
-@@session.ROCKSDB_READ_FREE_RPL_TABLES
-b
-"Setting the session scope variable back to default"
-SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = DEFAULT;
-SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES;
-@@session.ROCKSDB_READ_FREE_RPL_TABLES
-
+.*
+"Trying to set variable @@session.ROCKSDB_READ_FREE_RPL_TABLES to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = 444;
+ERROR HY000: Variable 'rocksdb_read_free_rpl_tables' is a GLOBAL variable and should be set with SET GLOBAL
'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_READ_FREE_RPL_TABLES to '*'"
+SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = '*';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
+@@global.ROCKSDB_READ_FREE_RPL_TABLES
+.*
SET @@global.ROCKSDB_READ_FREE_RPL_TABLES = @start_global_value;
SELECT @@global.ROCKSDB_READ_FREE_RPL_TABLES;
@@global.ROCKSDB_READ_FREE_RPL_TABLES
-
-SET @@session.ROCKSDB_READ_FREE_RPL_TABLES = @start_session_value;
-SELECT @@session.ROCKSDB_READ_FREE_RPL_TABLES;
-@@session.ROCKSDB_READ_FREE_RPL_TABLES
-
+.*
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=NULL;
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=DEFAULT;
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result
new file mode 100644
index 00000000000..e51df4f6834
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result
@@ -0,0 +1,97 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 1"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 1;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 0"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 0;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to on"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = on;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to off"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = off;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to true"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = true;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to false"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = false;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@session.ROCKSDB_ROLLBACK_ON_TIMEOUT to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ROLLBACK_ON_TIMEOUT = 444;
+ERROR HY000: Variable 'rocksdb_rollback_on_timeout' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 'aaa'"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT to 'bbb'"
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+SET @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT = @start_global_value;
+SELECT @@global.ROCKSDB_ROLLBACK_ON_TIMEOUT;
+@@global.ROCKSDB_ROLLBACK_ON_TIMEOUT
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result
index 3e169671cc0..a3749b75e47 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result
@@ -61,5 +61,7 @@ SET @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES = @start_session_value;
SELECT @@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES;
@@session.ROCKSDB_SKIP_UNIQUE_CHECK_TABLES
.*
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=NULL;
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=DEFAULT;
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result
new file mode 100644
index 00000000000..d8d218fe3e8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(4);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_STATS_LEVEL;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 0"
+SET @@global.ROCKSDB_STATS_LEVEL = 0;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 4"
+SET @@global.ROCKSDB_STATS_LEVEL = 4;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+4
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 2"
+SET @@global.ROCKSDB_STATS_LEVEL = 2;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@session.ROCKSDB_STATS_LEVEL to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_STATS_LEVEL = 444;
+ERROR HY000: Variable 'rocksdb_stats_level' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 'aaa'"
+SET @@global.ROCKSDB_STATS_LEVEL = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 'bbb'"
+SET @@global.ROCKSDB_STATS_LEVEL = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '-1'"
+SET @@global.ROCKSDB_STATS_LEVEL = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '101'"
+SET @@global.ROCKSDB_STATS_LEVEL = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+"Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '484436'"
+SET @@global.ROCKSDB_STATS_LEVEL = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+SET @@global.ROCKSDB_STATS_LEVEL = @start_global_value;
+SELECT @@global.ROCKSDB_STATS_LEVEL;
+@@global.ROCKSDB_STATS_LEVEL
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
index ba24fafd0ec..dcc7e1f68db 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
@@ -114,6 +114,10 @@ ERROR 42000: Variable 'rocksdb_update_cf_options' can't be set to the value of '
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
+SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=67108864;target_file_size_base=67108864};';
+SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=67108864;target_file_size_base=67108864};';
+SET @@global.rocksdb_update_cf_options = 'cf2={write_buffer_size=67108864;target_file_size_base=67108864;max_bytes_for_level_multiplier=10.000000};';
+SET @@global.rocksdb_update_cf_options = 'cf3={write_buffer_size=67108864;target_file_size_base=67108864};';
SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
@@global.rocksdb_update_cf_options
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def
new file mode 100644
index 00000000000..efa82ff6184
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def
@@ -0,0 +1,5 @@
+# Disabled tests
+
+rocksdb_read_free_rpl_basic : MariaDB: Read-Free replication is not supported
+rocksdb_read_free_rpl_tables_basic : MariaDB: Read-Free replication is not supported
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test
new file mode 100644
index 00000000000..70557621828
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_CACHE_DUMP
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test
new file mode 100644
index 00000000000..d97c4e20f82
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test
@@ -0,0 +1,24 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1.0);
+INSERT INTO valid_values VALUES(0.0);
+INSERT INTO valid_values VALUES(0.5);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES(2.0);
+INSERT INTO invalid_values VALUES(-0.5);
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+INSERT INTO invalid_values VALUES('\'0.5\'');
+
+--let $sys_var=ROCKSDB_CACHE_HIGH_PRI_POOL_RATIO
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test
new file mode 100644
index 00000000000..5ed7927f233
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_CACHE_INDEX_AND_FILTER_WITH_HIGH_PRIORITY
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt
new file mode 100644
index 00000000000..ae43ab332ee
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt
@@ -0,0 +1 @@
+--loose-rocksdb-dummy-option-instead-of-force-restart
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test
new file mode 100644
index 00000000000..0875e492b2c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test
@@ -0,0 +1,75 @@
+--disable_query_log
+call mtr.add_suppression("Failed to drop column family");
+call mtr.add_suppression("Column family '[a-z_]+' not found");
+--enable_query_log
+
+--source include/have_rocksdb.inc
+
+# should fail for not existing CF
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf'
+
+# should fail for default system cf
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = '__system__'
+
+# should fail for default cf
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'default'
+
+--disable_parsing
+# should succeed for not existing CF
+--eval SET @@global.ROCKSDB_DELETE_CF = 'nonexistent_cf'
+
+# should fail for default system cf
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = '__system__'
+
+alter table mysql.slave_worker_info engine = MyISAM;
+alter table mysql.slave_relay_log_info engine = MyISAM;
+alter table mysql.slave_gtid_info engine = MyISAM;
+alter table mysql.slave_master_info engine = MyISAM;
+
+select count(*) from information_schema.rocksdb_ddl where cf = 'default';
+
+# should fail for default cf
+--error ER_GET_ERRMSG
+--eval SET @@global.ROCKSDB_DELETE_CF = 'default'
+
+CREATE TABLE cf_deletion_test_table1 (
+ id1 int(10) unsigned NOT NULL DEFAULT '0',
+ id2 int(10) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (id1) COMMENT 'cf_primary_key',
+ KEY `sec_key` (id2) COMMENT 'cf_secondary_key'
+) ENGINE=ROCKSDB;
+
+# should fail, CFs are still in use
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key'
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_secondary_key'
+
+CREATE TABLE cf_deletion_test_table2 (
+ id1 int(10) unsigned NOT NULL DEFAULT '0',
+ PRIMARY KEY (id1) COMMENT 'cf_primary_key'
+) ENGINE=ROCKSDB;
+
+DROP TABLE cf_deletion_test_table1;
+
+# should fail, still used by second table
+--error ER_WRONG_VALUE_FOR_VAR
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key'
+
+# should succeed, no one is using it anymore
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_secondary_key'
+
+DROP TABLE cf_deletion_test_table2;
+
+# should succeed now
+--eval SET @@global.ROCKSDB_DELETE_CF = 'cf_primary_key'
+
+alter table mysql.slave_worker_info engine = ROCKSDB;
+alter table mysql.slave_relay_log_info engine = ROCKSDB;
+alter table mysql.slave_gtid_info engine = ROCKSDB;
+alter table mysql.slave_master_info engine = ROCKSDB;
+--enable_parsing
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test
new file mode 100644
index 00000000000..cd643cfef23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ENABLE_INSERT_WITH_UPDATE_CACHING
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test
new file mode 100644
index 00000000000..f37f75b4ac5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES('PK_SK');
+INSERT INTO valid_values VALUES('OFF');
+INSERT INTO valid_values VALUES('PK_ONLY');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('a');
+
+--let $sys_var=ROCKSDB_READ_FREE_RPL
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+SET GLOBAL ROCKSDB_READ_FREE_RPL=DEFAULT;
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test
index 71f42a47f4b..a2c900c91a9 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test
@@ -1,15 +1,20 @@
--source include/have_rocksdb.inc
+call mtr.add_suppression(".*Invalid pattern in rocksdb_read_free_rpl_tables.*");
+
CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES('a');
INSERT INTO valid_values VALUES('b');
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'*\'');
--let $sys_var=ROCKSDB_READ_FREE_RPL_TABLES
--let $read_only=0
---let $session=1
--source include/rocksdb_sys_var.inc
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=NULL;
+SET GLOBAL ROCKSDB_READ_FREE_RPL_TABLES=DEFAULT;
+
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test
new file mode 100644
index 00000000000..793b7752198
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ROLLBACK_ON_TIMEOUT
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test
index 3fe265ae930..50c2354d883 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test
@@ -11,5 +11,8 @@ CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
--let $session=1
--source include/rocksdb_sys_var.inc
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=NULL;
+SET GLOBAL ROCKSDB_SKIP_UNIQUE_CHECK_TABLES=DEFAULT;
+
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test
new file mode 100644
index 00000000000..89b0878fd0c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(4);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_STATS_LEVEL
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
index 533b2db8204..9462e40aaf0 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
@@ -55,6 +55,11 @@ SELECT @@global.rocksdb_update_cf_options;
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+# Save these off to reset later
+--let $ORIG_WRITE_BUFFER_SIZE=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE'`
+--let $ORIG_TARGET_FILE_SIZE_BASE=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE'`
+--let $ORIG_MAX_BYTES_FOR_LEVEL_MULTIPLIER=`SELECT VALUE FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER'`
+
# All good. Use default CF.
SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=8m;target_file_size_base=2m};';
SELECT @@global.rocksdb_update_cf_options;
@@ -100,6 +105,12 @@ SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FIL
--eval SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
SELECT @@global.rocksdb_update_cf_options;
+# Reset the cf options so the test passes with --repeat=2
+--eval SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};'
+--eval SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};'
+--eval SET @@global.rocksdb_update_cf_options = 'cf2={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE;max_bytes_for_level_multiplier=$ORIG_MAX_BYTES_FOR_LEVEL_MULTIPLIER};'
+--eval SET @@global.rocksdb_update_cf_options = 'cf3={write_buffer_size=$ORIG_WRITE_BUFFER_SIZE;target_file_size_base=$ORIG_TARGET_FILE_SIZE_BASE};'
+
SET @@global.rocksdb_update_cf_options = NULL;
SELECT @@global.rocksdb_update_cf_options;
diff --git a/storage/rocksdb/nosql_access.cc b/storage/rocksdb/nosql_access.cc
new file mode 100644
index 00000000000..e1b5d0f69a2
--- /dev/null
+++ b/storage/rocksdb/nosql_access.cc
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2019, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#define MYSQL_SERVER 1
+
+/* This C++ file's header */
+#include "./nosql_access.h"
+
+/* C++ standard header files */
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+/* C standard header files */
+#include <ctype.h>
+
+/* MySQL header files */
+#include "../../sql/item.h"
+#include "../../sql/sql_base.h"
+#include "../../sql/sql_class.h"
+#include "../../sql/strfunc.h"
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./rdb_buff.h"
+#include "./rdb_datadic.h"
+
+namespace myrocks {
+
+bool rocksdb_handle_single_table_select(THD * /* unused */,
+ st_select_lex * /* unused */) {
+ return false;
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/nosql_access.h b/storage/rocksdb/nosql_access.h
new file mode 100644
index 00000000000..70aaa400668
--- /dev/null
+++ b/storage/rocksdb/nosql_access.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2019, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* C++ standard header files */
+#include <array>
+#include <string>
+#include <vector>
+
+/* C standard header files */
+#include <ctype.h>
+
+/* MySQL header files */
+#include "../../sql/protocol.h"
+#include "./sql_string.h"
+
+#pragma once
+
+namespace myrocks {
+
+// Not needed in MyRocks:
+//bool rocksdb_handle_single_table_select(THD *thd, st_select_lex *select_lex);
+
+} // namespace myrocks
diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc
index 417ef447a69..d96e25e914c 100644
--- a/storage/rocksdb/properties_collector.cc
+++ b/storage/rocksdb/properties_collector.cc
@@ -49,12 +49,19 @@ my_bool rocksdb_compaction_sequential_deletes_count_sd = false;
Rdb_tbl_prop_coll::Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
const Rdb_compact_params &params,
- const uint32_t &cf_id,
- const uint8_t &table_stats_sampling_pct)
- : m_cf_id(cf_id), m_ddl_manager(ddl_manager), m_last_stats(nullptr),
- m_rows(0l), m_window_pos(0l), m_deleted_rows(0l), m_max_deleted_rows(0l),
- m_file_size(0), m_params(params),
- m_cardinality_collector(table_stats_sampling_pct), m_recorded(false) {
+ const uint32_t cf_id,
+ const uint8_t table_stats_sampling_pct)
+ : m_cf_id(cf_id),
+ m_ddl_manager(ddl_manager),
+ m_last_stats(nullptr),
+ m_rows(0l),
+ m_window_pos(0l),
+ m_deleted_rows(0l),
+ m_max_deleted_rows(0l),
+ m_file_size(0),
+ m_params(params),
+ m_cardinality_collector(table_stats_sampling_pct),
+ m_recorded(false) {
DBUG_ASSERT(ddl_manager != nullptr);
m_deleted_rows_window.resize(m_params.m_window, false);
@@ -148,35 +155,36 @@ Rdb_index_stats *Rdb_tbl_prop_coll::AccessStats(const rocksdb::Slice &key) {
void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key,
const rocksdb::Slice &value,
const rocksdb::EntryType &type,
- const uint64_t &file_size) {
+ const uint64_t file_size) {
auto stats = AccessStats(key);
stats->m_data_size += key.size() + value.size();
// Incrementing per-index entry-type statistics
switch (type) {
- case rocksdb::kEntryPut:
- stats->m_rows++;
- break;
- case rocksdb::kEntryDelete:
- stats->m_entry_deletes++;
- break;
- case rocksdb::kEntrySingleDelete:
- stats->m_entry_single_deletes++;
- break;
- case rocksdb::kEntryMerge:
- stats->m_entry_merges++;
- break;
- case rocksdb::kEntryOther:
- stats->m_entry_others++;
- break;
- default:
- // NO_LINT_DEBUG
- sql_print_error("RocksDB: Unexpected entry type found: %u. "
- "This should not happen so aborting the system.",
- type);
- abort();
- break;
+ case rocksdb::kEntryPut:
+ stats->m_rows++;
+ break;
+ case rocksdb::kEntryDelete:
+ stats->m_entry_deletes++;
+ break;
+ case rocksdb::kEntrySingleDelete:
+ stats->m_entry_single_deletes++;
+ break;
+ case rocksdb::kEntryMerge:
+ stats->m_entry_merges++;
+ break;
+ case rocksdb::kEntryOther:
+ stats->m_entry_others++;
+ break;
+ default:
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Unexpected entry type found: %u. "
+ "This should not happen so aborting the system.",
+ type);
+ abort();
+ break;
}
stats->m_actual_disk_size += file_size - m_file_size;
@@ -192,8 +200,8 @@ const char *Rdb_tbl_prop_coll::INDEXSTATS_KEY = "__indexstats__";
/*
This function is called by RocksDB to compute properties to store in sst file
*/
-rocksdb::Status
-Rdb_tbl_prop_coll::Finish(rocksdb::UserCollectedProperties *const properties) {
+rocksdb::Status Rdb_tbl_prop_coll::Finish(
+ rocksdb::UserCollectedProperties *const properties) {
uint64_t num_sst_entry_put = 0;
uint64_t num_sst_entry_delete = 0;
uint64_t num_sst_entry_singledelete = 0;
@@ -249,8 +257,8 @@ bool Rdb_tbl_prop_coll::NeedCompact() const {
/*
Returns the same as above, but in human-readable way for logging
*/
-rocksdb::UserCollectedProperties
-Rdb_tbl_prop_coll::GetReadableProperties() const {
+rocksdb::UserCollectedProperties Rdb_tbl_prop_coll::GetReadableProperties()
+ const {
std::string s;
#ifdef DBUG_OFF
s.append("[...");
@@ -321,8 +329,8 @@ void Rdb_tbl_prop_coll::read_stats_from_tbl_props(
/*
Serializes an array of Rdb_index_stats into a network string.
*/
-std::string
-Rdb_index_stats::materialize(const std::vector<Rdb_index_stats> &stats) {
+std::string Rdb_index_stats::materialize(
+ const std::vector<Rdb_index_stats> &stats) {
String ret;
rdb_netstr_append_uint16(&ret, INDEX_STATS_VERSION_ENTRY_TYPES);
for (const auto &i : stats) {
@@ -368,9 +376,10 @@ int Rdb_index_stats::unmaterialize(const std::string &s,
if (version < INDEX_STATS_VERSION_INITIAL ||
version > INDEX_STATS_VERSION_ENTRY_TYPES) {
// NO_LINT_DEBUG
- sql_print_error("Index stats version %d was outside of supported range. "
- "This should not happen so aborting the system.",
- version);
+ sql_print_error(
+ "Index stats version %d was outside of supported range. "
+ "This should not happen so aborting the system.",
+ version);
abort();
}
@@ -399,8 +408,7 @@ int Rdb_index_stats::unmaterialize(const std::string &s,
stats.m_entry_merges = rdb_netbuf_read_uint64(&p);
stats.m_entry_others = rdb_netbuf_read_uint64(&p);
}
- if (p +
- stats.m_distinct_keys_per_prefix.size() *
+ if (p + stats.m_distinct_keys_per_prefix.size() *
sizeof(stats.m_distinct_keys_per_prefix[0]) >
p2) {
return HA_EXIT_FAILURE;
@@ -417,8 +425,8 @@ int Rdb_index_stats::unmaterialize(const std::string &s,
Merges one Rdb_index_stats into another. Can be used to come up with the stats
for the index based on stats for each sst
*/
-void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment,
- const int64_t &estimated_data_len) {
+void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool increment,
+ const int64_t estimated_data_len) {
std::size_t i;
DBUG_ASSERT(estimated_data_len >= 0);
@@ -462,7 +470,7 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool &increment,
}
}
-Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct)
+Rdb_tbl_card_coll::Rdb_tbl_card_coll(const uint8_t table_stats_sampling_pct)
: m_table_stats_sampling_pct(table_stats_sampling_pct),
m_seed(time(nullptr)) {}
@@ -533,4 +541,4 @@ void Rdb_tbl_card_coll::AdjustStats(Rdb_index_stats *stats) {
}
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h
index 36d980d8f53..ce2773cd618 100644
--- a/storage/rocksdb/properties_collector.h
+++ b/storage/rocksdb/properties_collector.h
@@ -54,7 +54,7 @@ struct Rdb_index_stats {
int64_t m_entry_deletes, m_entry_single_deletes;
int64_t m_entry_merges, m_entry_others;
std::vector<int64_t> m_distinct_keys_per_prefix;
- std::string m_name; // name is not persisted
+ std::string m_name; // name is not persisted
static std::string materialize(const std::vector<Rdb_index_stats> &stats);
static int unmaterialize(const std::string &s,
@@ -62,18 +62,23 @@ struct Rdb_index_stats {
Rdb_index_stats() : Rdb_index_stats({0, 0}) {}
explicit Rdb_index_stats(GL_INDEX_ID gl_index_id)
- : m_gl_index_id(gl_index_id), m_data_size(0), m_rows(0),
- m_actual_disk_size(0), m_entry_deletes(0), m_entry_single_deletes(0),
- m_entry_merges(0), m_entry_others(0) {}
-
- void merge(const Rdb_index_stats &s, const bool &increment = true,
- const int64_t &estimated_data_len = 0);
+ : m_gl_index_id(gl_index_id),
+ m_data_size(0),
+ m_rows(0),
+ m_actual_disk_size(0),
+ m_entry_deletes(0),
+ m_entry_single_deletes(0),
+ m_entry_merges(0),
+ m_entry_others(0) {}
+
+ void merge(const Rdb_index_stats &s, const bool increment = true,
+ const int64_t estimated_data_len = 0);
};
// The helper class to calculate index cardinality
class Rdb_tbl_card_coll {
public:
- explicit Rdb_tbl_card_coll(const uint8_t &table_stats_sampling_pct);
+ explicit Rdb_tbl_card_coll(const uint8_t table_stats_sampling_pct);
public:
void ProcessKey(const rocksdb::Slice &key, const Rdb_key_def *keydef,
@@ -105,10 +110,10 @@ class Rdb_tbl_card_coll {
};
class Rdb_tbl_prop_coll : public rocksdb::TablePropertiesCollector {
-public:
+ public:
Rdb_tbl_prop_coll(Rdb_ddl_manager *const ddl_manager,
- const Rdb_compact_params &params, const uint32_t &cf_id,
- const uint8_t &table_stats_sampling_pct);
+ const Rdb_compact_params &params, const uint32_t cf_id,
+ const uint8_t table_stats_sampling_pct);
/*
Override parent class's virtual methods of interest.
@@ -120,8 +125,8 @@ public:
rocksdb::SequenceNumber seq,
uint64_t file_size) override;
- virtual rocksdb::Status
- Finish(rocksdb::UserCollectedProperties *properties) override;
+ virtual rocksdb::Status Finish(
+ rocksdb::UserCollectedProperties *properties) override;
virtual const char *Name() const override { return "Rdb_tbl_prop_coll"; }
@@ -129,25 +134,25 @@ public:
bool NeedCompact() const override;
-public:
+ public:
uint64_t GetMaxDeletedRows() const { return m_max_deleted_rows; }
static void read_stats_from_tbl_props(
const std::shared_ptr<const rocksdb::TableProperties> &table_props,
std::vector<Rdb_index_stats> *out_stats_vector);
-private:
+ private:
static std::string GetReadableStats(const Rdb_index_stats &it);
bool ShouldCollectStats();
void CollectStatsForRow(const rocksdb::Slice &key,
const rocksdb::Slice &value,
const rocksdb::EntryType &type,
- const uint64_t &file_size);
+ const uint64_t file_size);
Rdb_index_stats *AccessStats(const rocksdb::Slice &key);
void AdjustDeletedRows(rocksdb::EntryType type);
-private:
+ private:
uint32_t m_cf_id;
std::shared_ptr<const Rdb_key_def> m_keydef;
Rdb_ddl_manager *m_ddl_manager;
@@ -169,10 +174,10 @@ private:
class Rdb_tbl_prop_coll_factory
: public rocksdb::TablePropertiesCollectorFactory {
-public:
+ public:
Rdb_tbl_prop_coll_factory(const Rdb_tbl_prop_coll_factory &) = delete;
- Rdb_tbl_prop_coll_factory &
- operator=(const Rdb_tbl_prop_coll_factory &) = delete;
+ Rdb_tbl_prop_coll_factory &operator=(const Rdb_tbl_prop_coll_factory &) =
+ delete;
explicit Rdb_tbl_prop_coll_factory(Rdb_ddl_manager *ddl_manager)
: m_ddl_manager(ddl_manager) {}
@@ -192,19 +197,19 @@ public:
return "Rdb_tbl_prop_coll_factory";
}
-public:
+ public:
void SetCompactionParams(const Rdb_compact_params &params) {
m_params = params;
}
- void SetTableStatsSamplingPct(const uint8_t &table_stats_sampling_pct) {
+ void SetTableStatsSamplingPct(const uint8_t table_stats_sampling_pct) {
m_table_stats_sampling_pct = table_stats_sampling_pct;
}
-private:
+ private:
Rdb_ddl_manager *const m_ddl_manager;
Rdb_compact_params m_params;
uint8_t m_table_stats_sampling_pct;
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h
index 23645324470..639688db951 100644
--- a/storage/rocksdb/rdb_buff.h
+++ b/storage/rocksdb/rdb_buff.h
@@ -40,6 +40,17 @@
#define be16toh(x) OSSwapBigToHostInt16(x)
#endif
+/* MySQL header files */
+#include "./my_global.h"
+
+/* MyRocks header files */
+#include "./rdb_global.h"
+#include "./rdb_utils.h"
+
+/* RocksDB header files */
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
namespace myrocks {
/*
@@ -48,7 +59,7 @@ namespace myrocks {
*/
inline void rdb_netstr_append_uint64(my_core::String *const out_netstr,
- const uint64 &val) {
+ const uint64 val) {
DBUG_ASSERT(out_netstr != nullptr);
// Convert from host machine byte order (usually Little Endian) to network
@@ -58,7 +69,7 @@ inline void rdb_netstr_append_uint64(my_core::String *const out_netstr,
}
inline void rdb_netstr_append_uint32(my_core::String *const out_netstr,
- const uint32 &val) {
+ const uint32 val) {
DBUG_ASSERT(out_netstr != nullptr);
// Convert from host machine byte order (usually Little Endian) to network
@@ -68,7 +79,7 @@ inline void rdb_netstr_append_uint32(my_core::String *const out_netstr,
}
inline void rdb_netstr_append_uint16(my_core::String *const out_netstr,
- const uint16 &val) {
+ const uint16 val) {
DBUG_ASSERT(out_netstr != nullptr);
// Convert from host machine byte order (usually Little Endian) to network
@@ -81,7 +92,7 @@ inline void rdb_netstr_append_uint16(my_core::String *const out_netstr,
Basic network buffer ("netbuf") write helper functions.
*/
-inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 &n) {
+inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 n) {
DBUG_ASSERT(dst_netbuf != nullptr);
// Convert from host byte order (usually Little Endian) to network byte order
@@ -90,7 +101,7 @@ inline void rdb_netbuf_store_uint64(uchar *const dst_netbuf, const uint64 &n) {
memcpy(dst_netbuf, &net_val, sizeof(net_val));
}
-inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 &n) {
+inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 n) {
DBUG_ASSERT(dst_netbuf != nullptr);
// Convert from host byte order (usually Little Endian) to network byte order
@@ -99,7 +110,7 @@ inline void rdb_netbuf_store_uint32(uchar *const dst_netbuf, const uint32 &n) {
memcpy(dst_netbuf, &net_val, sizeof(net_val));
}
-inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 &n) {
+inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 n) {
DBUG_ASSERT(dst_netbuf != nullptr);
// Convert from host byte order (usually Little Endian) to network byte order
@@ -108,14 +119,14 @@ inline void rdb_netbuf_store_uint16(uchar *const dst_netbuf, const uint16 &n) {
memcpy(dst_netbuf, &net_val, sizeof(net_val));
}
-inline void rdb_netbuf_store_byte(uchar *const dst_netbuf, const uchar &c) {
+inline void rdb_netbuf_store_byte(uchar *const dst_netbuf, const uchar c) {
DBUG_ASSERT(dst_netbuf != nullptr);
*dst_netbuf = c;
}
inline void rdb_netbuf_store_index(uchar *const dst_netbuf,
- const uint32 &number) {
+ const uint32 number) {
DBUG_ASSERT(dst_netbuf != nullptr);
rdb_netbuf_store_uint32(dst_netbuf, number);
@@ -230,10 +241,10 @@ class Rdb_string_reader {
const char *m_ptr;
uint m_len;
-private:
+ private:
Rdb_string_reader &operator=(const Rdb_string_reader &) = default;
-public:
+ public:
Rdb_string_reader(const Rdb_string_reader &) = default;
/* named constructor */
static Rdb_string_reader read_or_empty(const rocksdb::Slice *const slice) {
@@ -268,7 +279,7 @@ public:
Read the next @param size bytes. Returns pointer to the bytes read, or
nullptr if the remaining string doesn't have that many bytes.
*/
- const char *read(const uint &size) {
+ const char *read(const uint size) {
const char *res;
if (m_len < size) {
res = nullptr;
@@ -282,21 +293,21 @@ public:
bool read_uint8(uint *const res) {
const uchar *p;
- if (!(p = reinterpret_cast<const uchar *>(read(1))))
- return true; // error
- else {
+ if (!(p = reinterpret_cast<const uchar *>(read(1)))) {
+ return true; // error
+ } else {
*res = *p;
- return false; // Ok
+ return false; // Ok
}
}
bool read_uint16(uint *const res) {
const uchar *p;
- if (!(p = reinterpret_cast<const uchar *>(read(2))))
- return true; // error
- else {
+ if (!(p = reinterpret_cast<const uchar *>(read(2)))) {
+ return true; // error
+ } else {
*res = rdb_netbuf_to_uint16(p);
- return false; // Ok
+ return false; // Ok
}
}
@@ -338,29 +349,29 @@ public:
class Rdb_string_writer {
std::vector<uchar> m_data;
-public:
+ public:
Rdb_string_writer(const Rdb_string_writer &) = delete;
Rdb_string_writer &operator=(const Rdb_string_writer &) = delete;
Rdb_string_writer() = default;
void clear() { m_data.clear(); }
- void write_uint8(const uint &val) {
+ void write_uint8(const uint val) {
m_data.push_back(static_cast<uchar>(val));
}
- void write_uint16(const uint &val) {
+ void write_uint16(const uint val) {
const auto size = m_data.size();
m_data.resize(size + 2);
rdb_netbuf_store_uint16(m_data.data() + size, val);
}
- void write_uint32(const uint &val) {
+ void write_uint32(const uint val) {
const auto size = m_data.size();
m_data.resize(size + 4);
rdb_netbuf_store_uint32(m_data.data() + size, val);
}
- void write(const uchar *const new_data, const size_t &len) {
+ void write(const uchar *const new_data, const size_t len) {
DBUG_ASSERT(new_data != nullptr);
m_data.insert(m_data.end(), new_data, new_data + len);
}
@@ -368,24 +379,24 @@ public:
uchar *ptr() { return m_data.data(); }
size_t get_current_pos() const { return m_data.size(); }
- void write_uint8_at(const size_t &pos, const uint &new_val) {
+ void write_uint8_at(const size_t pos, const uint new_val) {
// This function will only overwrite what was written
DBUG_ASSERT(pos < get_current_pos());
m_data.data()[pos] = new_val;
}
- void write_uint16_at(const size_t &pos, const uint &new_val) {
+ void write_uint16_at(const size_t pos, const uint new_val) {
// This function will only overwrite what was written
DBUG_ASSERT(pos < get_current_pos() && (pos + 1) < get_current_pos());
rdb_netbuf_store_uint16(m_data.data() + pos, new_val);
}
- void truncate(const size_t &pos) {
+ void truncate(const size_t pos) {
DBUG_ASSERT(pos < m_data.size());
m_data.resize(pos);
}
- void allocate(const size_t &len, const uchar &val = 0) {
+ void allocate(const size_t len, const uchar val = 0) {
DBUG_ASSERT(len > 0);
m_data.resize(m_data.size() + len, val);
}
@@ -407,14 +418,14 @@ class Rdb_bit_writer {
Rdb_string_writer *m_writer;
uchar m_offset;
-public:
+ public:
Rdb_bit_writer(const Rdb_bit_writer &) = delete;
Rdb_bit_writer &operator=(const Rdb_bit_writer &) = delete;
explicit Rdb_bit_writer(Rdb_string_writer *writer_arg)
: m_writer(writer_arg), m_offset(0) {}
- void write(uint size, const uint &value) {
+ void write(uint size, const uint value) {
DBUG_ASSERT((value & ((1 << size) - 1)) == value);
while (size > 0) {
@@ -439,7 +450,7 @@ class Rdb_bit_reader {
uint m_ret;
Rdb_string_reader *const m_reader;
-public:
+ public:
Rdb_bit_reader(const Rdb_bit_reader &) = delete;
Rdb_bit_reader &operator=(const Rdb_bit_reader &) = delete;
@@ -472,4 +483,67 @@ public:
}
};
-} // namespace myrocks
+template <size_t buf_length>
+class Rdb_buf_writer {
+ public:
+ Rdb_buf_writer(const Rdb_buf_writer &) = delete;
+ Rdb_buf_writer &operator=(const Rdb_buf_writer &) = delete;
+ Rdb_buf_writer() { reset(); }
+
+ void write_uint32(const uint32 n) {
+ DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_uint32(m_ptr, n);
+ m_ptr += sizeof(n);
+ }
+
+ void write_uint64(const uint64 n) {
+ DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_uint64(m_ptr, n);
+ m_ptr += sizeof(n);
+ }
+
+ void write_uint16(const uint16 n) {
+ DBUG_ASSERT(m_ptr + sizeof(n) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_uint16(m_ptr, n);
+ m_ptr += sizeof(n);
+ }
+
+ void write_byte(const uchar c) {
+ DBUG_ASSERT(m_ptr + sizeof(c) <= m_buf.data() + buf_length);
+ rdb_netbuf_store_byte(m_ptr, c);
+ m_ptr += sizeof(c);
+ }
+
+ void write_index(const uint32 n) { write_uint32(n); }
+
+ void write(const char *buf, const size_t size) {
+ DBUG_ASSERT(m_ptr + size <= m_buf.data() + buf_length);
+ memcpy(m_ptr, buf, size);
+ m_ptr += size;
+ }
+
+ void write(const uchar *buf, const size_t size) {
+ DBUG_ASSERT(m_ptr + size <= m_buf.data() + buf_length);
+ memcpy(m_ptr, buf, size);
+ m_ptr += size;
+ }
+
+ void reset() { m_ptr = m_buf.data(); }
+
+ const char *data() const {
+ return reinterpret_cast<const char *>(m_buf.data());
+ }
+
+ size_t capacity() { return buf_length; }
+
+ /** Returns actual size of the buffer that has data */
+ size_t size() { return m_ptr - m_buf.data(); }
+
+ rocksdb::Slice to_slice() { return rocksdb::Slice(data(), size()); }
+
+ private:
+ std::array<uchar, buf_length> m_buf;
+ uchar *m_ptr;
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_manager.cc b/storage/rocksdb/rdb_cf_manager.cc
index 777c937797b..c4461346f32 100644
--- a/storage/rocksdb/rdb_cf_manager.cc
+++ b/storage/rocksdb/rdb_cf_manager.cc
@@ -15,7 +15,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
+#pragma implementation // gcc: Class implementation
#endif
/* For use of 'PRIu64': */
@@ -31,8 +31,11 @@
/* MyRocks header files */
#include "./ha_rocksdb.h"
#include "./ha_rocksdb_proto.h"
+#include "./rdb_datadic.h"
#include "./rdb_psi.h"
+#include <string>
+
namespace myrocks {
/* Check if ColumnFamily name says it's a reverse-ordered CF */
@@ -43,7 +46,7 @@ bool Rdb_cf_manager::is_cf_name_reverse(const char *const name) {
}
void Rdb_cf_manager::init(
- std::unique_ptr<Rdb_cf_options> cf_options,
+ std::unique_ptr<Rdb_cf_options> &&cf_options,
std::vector<rocksdb::ColumnFamilyHandle *> *const handles) {
mysql_mutex_init(rdb_cfm_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST);
@@ -75,9 +78,8 @@ void Rdb_cf_manager::cleanup() {
@detail
See Rdb_cf_manager::get_cf
*/
-rocksdb::ColumnFamilyHandle *
-Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb,
- const std::string &cf_name_arg) {
+rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_or_create_cf(
+ rocksdb::DB *const rdb, const std::string &cf_name_arg) {
DBUG_ASSERT(rdb != nullptr);
rocksdb::ColumnFamilyHandle *cf_handle = nullptr;
@@ -105,7 +107,10 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb,
// NO_LINT_DEBUG
sql_print_information("RocksDB: creating a column family %s",
cf_name.c_str());
+ // NO_LINT_DEBUG
sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size);
+
+ // NO_LINT_DEBUG
sql_print_information(" target_file_size_base=%" PRIu64,
opts.target_file_size_base);
@@ -129,12 +134,13 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb,
Find column family by its cf_name.
*/
-rocksdb::ColumnFamilyHandle *
-Rdb_cf_manager::get_cf(const std::string &cf_name_arg) const {
+rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(
+ const std::string &cf_name_arg, const bool lock_held_by_caller) const {
rocksdb::ColumnFamilyHandle *cf_handle;
- RDB_MUTEX_LOCK_CHECK(m_mutex);
-
+ if (!lock_held_by_caller) {
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ }
std::string cf_name = cf_name_arg.empty() ? DEFAULT_CF_NAME : cf_name_arg;
const auto it = m_cf_name_map.find(cf_name);
@@ -145,18 +151,19 @@ Rdb_cf_manager::get_cf(const std::string &cf_name_arg) const {
sql_print_warning("Column family '%s' not found.", cf_name.c_str());
}
- RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ if (!lock_held_by_caller) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ }
return cf_handle;
}
-rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(const uint32_t &id) const {
+rocksdb::ColumnFamilyHandle *Rdb_cf_manager::get_cf(const uint32_t id) const {
rocksdb::ColumnFamilyHandle *cf_handle = nullptr;
RDB_MUTEX_LOCK_CHECK(m_mutex);
const auto it = m_cf_id_map.find(id);
- if (it != m_cf_id_map.end())
- cf_handle = it->second;
+ if (it != m_cf_id_map.end()) cf_handle = it->second;
RDB_MUTEX_UNLOCK_CHECK(m_mutex);
return cf_handle;
@@ -174,8 +181,8 @@ std::vector<std::string> Rdb_cf_manager::get_cf_names(void) const {
return names;
}
-std::vector<rocksdb::ColumnFamilyHandle *>
-Rdb_cf_manager::get_all_cf(void) const {
+std::vector<rocksdb::ColumnFamilyHandle *> Rdb_cf_manager::get_all_cf(
+ void) const {
std::vector<rocksdb::ColumnFamilyHandle *> list;
RDB_MUTEX_LOCK_CHECK(m_mutex);
@@ -190,4 +197,77 @@ Rdb_cf_manager::get_all_cf(void) const {
return list;
}
-} // namespace myrocks
+struct Rdb_cf_scanner : public Rdb_tables_scanner {
+ uint32_t m_cf_id;
+ int m_is_cf_used;
+
+ explicit Rdb_cf_scanner(uint32_t cf_id)
+ : m_cf_id(cf_id), m_is_cf_used(false) {}
+
+ int add_table(Rdb_tbl_def *tdef) override {
+ DBUG_ASSERT(tdef != nullptr);
+
+ for (uint i = 0; i < tdef->m_key_count; i++) {
+ const Rdb_key_def &kd = *tdef->m_key_descr_arr[i];
+
+ if (kd.get_cf()->GetID() == m_cf_id) {
+ m_is_cf_used = true;
+ return HA_EXIT_SUCCESS;
+ }
+ }
+ return HA_EXIT_SUCCESS;
+ }
+};
+
+int Rdb_cf_manager::drop_cf(const std::string &cf_name) {
+ auto ddl_manager = rdb_get_ddl_manager();
+ uint32_t cf_id = 0;
+
+ if (cf_name == DEFAULT_SYSTEM_CF_NAME) {
+ return HA_EXIT_FAILURE;
+ }
+
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
+ auto cf_handle = get_cf(cf_name, true /* lock_held_by_caller */);
+ if (cf_handle == nullptr) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return HA_EXIT_SUCCESS;
+ }
+
+ cf_id = cf_handle->GetID();
+ Rdb_cf_scanner scanner(cf_id);
+
+ auto ret = ddl_manager->scan_for_tables(&scanner);
+ if (ret) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return ret;
+ }
+
+ if (scanner.m_is_cf_used) {
+ // column family is used by existing key
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return HA_EXIT_FAILURE;
+ }
+
+ auto rdb = rdb_get_rocksdb_db();
+ auto status = rdb->DropColumnFamily(cf_handle);
+ if (!status.ok()) {
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+ return ha_rocksdb::rdb_error_to_mysql(status);
+ }
+
+ delete cf_handle;
+
+ auto id_iter = m_cf_id_map.find(cf_id);
+ DBUG_ASSERT(id_iter != m_cf_id_map.end());
+ m_cf_id_map.erase(id_iter);
+
+ auto name_iter = m_cf_name_map.find(cf_name);
+ DBUG_ASSERT(name_iter != m_cf_name_map.end());
+ m_cf_name_map.erase(name_iter);
+
+ RDB_MUTEX_UNLOCK_CHECK(m_mutex);
+
+ return HA_EXIT_SUCCESS;
+}
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_manager.h b/storage/rocksdb/rdb_cf_manager.h
index 943b6f07c2b..cf7b3d6cfb8 100644
--- a/storage/rocksdb/rdb_cf_manager.h
+++ b/storage/rocksdb/rdb_cf_manager.h
@@ -66,7 +66,7 @@ class Rdb_cf_manager {
column
families that are present in the database. The first CF is the default CF.
*/
- void init(std::unique_ptr<Rdb_cf_options> cf_options,
+ void init(std::unique_ptr<Rdb_cf_options> &&cf_options,
std::vector<rocksdb::ColumnFamilyHandle *> *const handles);
void cleanup();
@@ -78,10 +78,11 @@ class Rdb_cf_manager {
const std::string &cf_name);
/* Used by table open */
- rocksdb::ColumnFamilyHandle *get_cf(const std::string &cf_name) const;
+ rocksdb::ColumnFamilyHandle *get_cf(
+ const std::string &cf_name, const bool lock_held_by_caller = false) const;
/* Look up cf by id; used by datadic */
- rocksdb::ColumnFamilyHandle *get_cf(const uint32_t &id) const;
+ rocksdb::ColumnFamilyHandle *get_cf(const uint32_t id) const;
/* Used to iterate over column families for show status */
std::vector<std::string> get_cf_names(void) const;
@@ -89,7 +90,8 @@ class Rdb_cf_manager {
/* Used to iterate over column families */
std::vector<rocksdb::ColumnFamilyHandle *> get_all_cf(void) const;
- // void drop_cf(); -- not implemented so far.
+ /* Used to delete cf by name */
+ int drop_cf(const std::string &cf_name);
void get_cf_options(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts)
@@ -103,4 +105,4 @@ class Rdb_cf_manager {
}
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc
index eaa3d07be4b..4f12a998e65 100644
--- a/storage/rocksdb/rdb_cf_options.cc
+++ b/storage/rocksdb/rdb_cf_options.cc
@@ -15,7 +15,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
+#pragma implementation // gcc: Class implementation
#endif
#include <my_config.h>
@@ -99,10 +99,10 @@ void Rdb_cf_options::update(const std::string &cf_name,
bool Rdb_cf_options::set_default(const std::string &default_config) {
rocksdb::ColumnFamilyOptions options;
- if (!default_config.empty() &&
- !rocksdb::GetColumnFamilyOptionsFromString(options, default_config,
- &options)
- .ok()) {
+ if (!default_config.empty() && !rocksdb::GetColumnFamilyOptionsFromString(
+ options, default_config, &options)
+ .ok()) {
+ // NO_LINT_DEBUG
fprintf(stderr, "Invalid default column family config: %s\n",
default_config.c_str());
return false;
@@ -116,8 +116,7 @@ bool Rdb_cf_options::set_default(const std::string &default_config) {
void Rdb_cf_options::skip_spaces(const std::string &input, size_t *const pos) {
DBUG_ASSERT(pos != nullptr);
- while (*pos < input.size() && isspace(input[*pos]))
- ++(*pos);
+ while (*pos < input.size() && isspace(input[*pos])) ++(*pos);
}
// Find a valid column family name. Note that all characters except a
@@ -135,8 +134,7 @@ bool Rdb_cf_options::find_column_family(const std::string &input,
// Loop through the characters in the string until we see a '='.
for (; *pos < input.size() && input[*pos] != '='; ++(*pos)) {
// If this is not a space, move the end position to the current position.
- if (input[*pos] != ' ')
- end_pos = *pos;
+ if (input[*pos] != ' ') end_pos = *pos;
}
if (end_pos == beg_pos - 1) {
@@ -177,24 +175,24 @@ bool Rdb_cf_options::find_options(const std::string &input, size_t *const pos,
// number of closing curly braces.
while (*pos < input.size()) {
switch (input[*pos]) {
- case '}':
- // If this is a closing curly brace and we bring the count down to zero
- // we can exit the loop with a valid options string.
- if (--brace_count == 0) {
- *options = input.substr(beg_pos, *pos - beg_pos);
- ++(*pos); // Move past the last closing curly brace
- return true;
- }
-
- break;
-
- case '{':
- // If this is an open curly brace increment the count.
- ++brace_count;
- break;
-
- default:
- break;
+ case '}':
+ // If this is a closing curly brace and we bring the count down to zero
+ // we can exit the loop with a valid options string.
+ if (--brace_count == 0) {
+ *options = input.substr(beg_pos, *pos - beg_pos);
+ ++(*pos); // Move past the last closing curly brace
+ return true;
+ }
+
+ break;
+
+ case '{':
+ // If this is an open curly brace increment the count.
+ ++brace_count;
+ break;
+
+ default:
+ break;
}
// Move to the next character.
@@ -221,8 +219,7 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string &input,
skip_spaces(input, pos);
// We should now have a column family name.
- if (!find_column_family(input, pos, cf))
- return false;
+ if (!find_column_family(input, pos, cf)) return false;
// If we are at the end of the input then we generate an error.
if (*pos == input.size()) {
@@ -238,8 +235,7 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string &input,
// Find the options for this column family. This should be in the format
// {<options>} where <options> may contain embedded pairs of curly braces.
- if (!find_options(input, pos, opt_str))
- return false;
+ if (!find_options(input, pos, opt_str)) return false;
// Skip any trailing spaces after the option string.
skip_spaces(input, pos);
@@ -260,7 +256,7 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string &input,
}
bool Rdb_cf_options::parse_cf_options(const std::string &cf_options,
- Name_to_config_t *option_map) {
+ Name_to_config_t *option_map) {
std::string cf;
std::string opt_str;
rocksdb::ColumnFamilyOptions options;
@@ -316,8 +312,8 @@ bool Rdb_cf_options::set_override(const std::string &override_config) {
return true;
}
-const rocksdb::Comparator *
-Rdb_cf_options::get_cf_comparator(const std::string &cf_name) {
+const rocksdb::Comparator *Rdb_cf_options::get_cf_comparator(
+ const std::string &cf_name) {
if (Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str())) {
return &s_rev_pk_comparator;
} else {
@@ -325,8 +321,8 @@ Rdb_cf_options::get_cf_comparator(const std::string &cf_name) {
}
}
-std::shared_ptr<rocksdb::MergeOperator>
-Rdb_cf_options::get_cf_merge_operator(const std::string &cf_name) {
+std::shared_ptr<rocksdb::MergeOperator> Rdb_cf_options::get_cf_merge_operator(
+ const std::string &cf_name) {
return (cf_name == DEFAULT_SYSTEM_CF_NAME)
? std::make_shared<Rdb_system_merge_op>()
: nullptr;
@@ -342,4 +338,4 @@ void Rdb_cf_options::get_cf_options(const std::string &cf_name,
opts->merge_operator = get_cf_merge_operator(cf_name);
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h
index 349f7c42e32..360356f7af1 100644
--- a/storage/rocksdb/rdb_cf_options.h
+++ b/storage/rocksdb/rdb_cf_options.h
@@ -39,7 +39,7 @@ namespace myrocks {
families not found in the map.
*/
class Rdb_cf_options {
-public:
+ public:
using Name_to_config_t = std::unordered_map<std::string, std::string>;
Rdb_cf_options(const Rdb_cf_options &) = delete;
@@ -61,20 +61,20 @@ public:
return m_default_cf_opts;
}
- static const rocksdb::Comparator *
- get_cf_comparator(const std::string &cf_name);
+ static const rocksdb::Comparator *get_cf_comparator(
+ const std::string &cf_name);
- std::shared_ptr<rocksdb::MergeOperator>
- get_cf_merge_operator(const std::string &cf_name);
+ std::shared_ptr<rocksdb::MergeOperator> get_cf_merge_operator(
+ const std::string &cf_name);
void get_cf_options(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts)
MY_ATTRIBUTE((__nonnull__));
static bool parse_cf_options(const std::string &cf_options,
- Name_to_config_t *option_map);
+ Name_to_config_t *option_map);
-private:
+ private:
bool set_default(const std::string &default_config);
bool set_override(const std::string &overide_config);
@@ -88,7 +88,7 @@ private:
std::string *const cf,
std::string *const opt_str);
-private:
+ private:
static Rdb_pk_comparator s_pk_comparator;
static Rdb_rev_comparator s_rev_pk_comparator;
@@ -101,4 +101,4 @@ private:
rocksdb::ColumnFamilyOptions m_default_cf_opts;
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h
index ecc78de91bf..1cd27273b56 100644
--- a/storage/rocksdb/rdb_compact_filter.h
+++ b/storage/rocksdb/rdb_compact_filter.h
@@ -17,12 +17,12 @@
#pragma once
#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
+#pragma implementation // gcc: Class implementation
#endif
/* C++ system header files */
-#include <string>
#include <time.h>
+#include <string>
#include <ctime>
/* RocksDB includes */
@@ -35,7 +35,7 @@
namespace myrocks {
class Rdb_compact_filter : public rocksdb::CompactionFilter {
-public:
+ public:
Rdb_compact_filter(const Rdb_compact_filter &) = delete;
Rdb_compact_filter &operator=(const Rdb_compact_filter &) = delete;
@@ -80,7 +80,7 @@ public:
m_snapshot_timestamp = static_cast<uint64_t>(std::time(nullptr));
}
-#ifndef NDEBUG
+#ifndef DBUG_OFF
int snapshot_ts = rdb_dbug_set_ttl_snapshot_ts();
if (snapshot_ts) {
m_snapshot_timestamp =
@@ -134,12 +134,13 @@ public:
struct Rdb_index_info index_info;
if (!rdb_get_dict_manager()->get_index_info(gl_index_id, &index_info)) {
// NO_LINT_DEBUG
- sql_print_error("RocksDB: Could not get index information "
- "for Index Number (%u,%u)",
- gl_index_id.cf_id, gl_index_id.index_id);
+ sql_print_error(
+ "RocksDB: Could not get index information "
+ "for Index Number (%u,%u)",
+ gl_index_id.cf_id, gl_index_id.index_id);
}
-#ifndef NDEBUG
+#ifndef DBUG_OFF
if (rdb_dbug_set_ttl_ignore_pk() &&
index_info.m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY) {
*ttl_duration = 0;
@@ -164,9 +165,10 @@ public:
buf = rdb_hexdump(existing_value.data(), existing_value.size(),
RDB_MAX_HEXDUMP_LEN);
// NO_LINT_DEBUG
- sql_print_error("Decoding ttl from PK value failed in compaction filter, "
- "for index (%u,%u), val: %s",
- m_prev_index.cf_id, m_prev_index.index_id, buf.c_str());
+ sql_print_error(
+ "Decoding ttl from PK value failed in compaction filter, "
+ "for index (%u,%u), val: %s",
+ m_prev_index.cf_id, m_prev_index.index_id, buf.c_str());
abort();
}
@@ -198,10 +200,10 @@ public:
};
class Rdb_compact_filter_factory : public rocksdb::CompactionFilterFactory {
-public:
+ public:
Rdb_compact_filter_factory(const Rdb_compact_filter_factory &) = delete;
- Rdb_compact_filter_factory &
- operator=(const Rdb_compact_filter_factory &) = delete;
+ Rdb_compact_filter_factory &operator=(const Rdb_compact_filter_factory &) =
+ delete;
Rdb_compact_filter_factory() {}
~Rdb_compact_filter_factory() {}
@@ -215,4 +217,4 @@ public:
}
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_comparator.h b/storage/rocksdb/rdb_comparator.h
index b43118eda36..9cb25925159 100644
--- a/storage/rocksdb/rdb_comparator.h
+++ b/storage/rocksdb/rdb_comparator.h
@@ -36,7 +36,7 @@ namespace myrocks {
Rdb_key_def)
*/
class Rdb_pk_comparator : public rocksdb::Comparator {
-public:
+ public:
Rdb_pk_comparator(const Rdb_pk_comparator &) = delete;
Rdb_pk_comparator &operator=(const Rdb_pk_comparator &) = delete;
Rdb_pk_comparator() = default;
@@ -63,7 +63,7 @@ public:
};
class Rdb_rev_comparator : public rocksdb::Comparator {
-public:
+ public:
Rdb_rev_comparator(const Rdb_rev_comparator &) = delete;
Rdb_rev_comparator &operator=(const Rdb_rev_comparator &) = delete;
Rdb_rev_comparator() = default;
@@ -82,4 +82,4 @@ public:
}
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_converter.cc b/storage/rocksdb/rdb_converter.cc
new file mode 100644
index 00000000000..c558f428652
--- /dev/null
+++ b/storage/rocksdb/rdb_converter.cc
@@ -0,0 +1,838 @@
+/*
+ Copyright (c) 2015, Facebook, Inc.
+
+ This program is f
+ i the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <my_config.h>
+
+/* This C++ file's header file */
+#include "./rdb_converter.h"
+
+/* Standard C++ header files */
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./field.h"
+#include "./key.h"
+#include "./m_ctype.h"
+#include "./my_bit.h"
+#include "./my_bitmap.h"
+#include "./sql_table.h"
+
+
+/* MyRocks header files */
+#include "./ha_rocksdb.h"
+#include "./ha_rocksdb_proto.h"
+#include "./my_stacktrace.h"
+#include "./rdb_cf_manager.h"
+#include "./rdb_psi.h"
+#include "./rdb_utils.h"
+
+
+namespace myrocks {
+
+void dbug_modify_key_varchar8(String *on_disk_rec) {
+ std::string res;
+ // The key starts with index number
+ res.append(on_disk_rec->ptr(), Rdb_key_def::INDEX_NUMBER_SIZE);
+
+ // Then, a mem-comparable form of a varchar(8) value.
+ res.append("ABCDE\0\0\0\xFC", 9);
+ on_disk_rec->length(0);
+ on_disk_rec->append(res.data(), res.size());
+}
+
+/*
+ Convert field from rocksdb storage format into Mysql Record format
+ @param buf OUT start memory to fill converted data
+ @param offset IN/OUT decoded data is stored in buf + offset
+ @param table IN current table
+ @param field IN current field
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode(uchar *const buf, uint *offset,
+ TABLE *table,
+ my_core::Field *field,
+ Rdb_field_encoder *field_dec,
+ Rdb_string_reader *reader,
+ bool decode, bool is_null) {
+ int err = HA_EXIT_SUCCESS;
+
+ uint field_offset = field->ptr - table->record[0];
+ *offset = field_offset;
+ uint null_offset = field->null_offset();
+ bool maybe_null = field->real_maybe_null();
+ field->move_field(buf + field_offset,
+ maybe_null ? buf + null_offset : nullptr, field->null_bit);
+
+ if (is_null) {
+ if (decode) {
+ // This sets the NULL-bit of this record
+ field->set_null();
+ /*
+ Besides that, set the field value to default value. CHECKSUM TABLE
+ depends on this.
+ */
+ memcpy(field->ptr, table->s->default_values + field_offset,
+ field->pack_length());
+ }
+ } else {
+ if (decode) {
+ // sets non-null bits for this record
+ field->set_notnull();
+ }
+
+ if (field_dec->m_field_type == MYSQL_TYPE_BLOB) {
+ err = decode_blob(table, field, reader, decode);
+ } else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) {
+ err = decode_varchar(field, reader, decode);
+ } else {
+ err = decode_fixed_length_field(field, field_dec, reader, decode);
+ }
+ }
+
+ // Restore field->ptr and field->null_ptr
+ field->move_field(table->record[0] + field_offset,
+ maybe_null ? table->record[0] + null_offset : nullptr,
+ field->null_bit);
+
+ return err;
+}
+
+/*
+ Convert blob from rocksdb storage format into Mysql Record format
+ @param table IN current table
+ @param field IN current field
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode_blob(TABLE *table, Field *field,
+ Rdb_string_reader *reader,
+ bool decode) {
+ my_core::Field_blob *blob = (my_core::Field_blob *)field;
+
+ // Get the number of bytes needed to store length
+ const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr;
+
+ const char *data_len_str;
+ if (!(data_len_str = reader->read(length_bytes))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ memcpy(blob->ptr, data_len_str, length_bytes);
+ uint32 data_len =
+ blob->get_length(reinterpret_cast<const uchar *>(data_len_str),
+ length_bytes);
+ const char *blob_ptr;
+ if (!(blob_ptr = reader->read(data_len))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (decode) {
+ // set 8-byte pointer to 0, like innodb does (relevant for 32-bit
+ // platforms)
+ memset(blob->ptr + length_bytes, 0, 8);
+ memcpy(blob->ptr + length_bytes, &blob_ptr, sizeof(uchar **));
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Convert fixed length field from rocksdb storage format into Mysql Record
+ format
+ @param field IN current field
+ @param field_dec IN data structure conttain field encoding data
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode_fixed_length_field(
+ my_core::Field *const field, Rdb_field_encoder *field_dec,
+ Rdb_string_reader *const reader, bool decode) {
+ uint len = field_dec->m_pack_length_in_rec;
+ if (len > 0) {
+ const char *data_bytes;
+ if ((data_bytes = reader->read(len)) == nullptr) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (decode) {
+ memcpy(field->ptr, data_bytes, len);
+ }
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Convert varchar field from rocksdb storage format into Mysql Record format
+ @param field IN current field
+ @param field_dec IN data structure conttain field encoding data
+ @param reader IN rocksdb value slice reader
+ @param decode IN whether to decode current field
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_convert_to_record_value_decoder::decode_varchar(
+ Field *field, Rdb_string_reader *const reader, bool decode) {
+ my_core::Field_varstring *const field_var = (my_core::Field_varstring *)field;
+
+ const char *data_len_str;
+ if (!(data_len_str = reader->read(field_var->length_bytes))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ uint data_len;
+ // field_var->length_bytes is 1 or 2
+ if (field_var->length_bytes == 1) {
+ data_len = (uchar)data_len_str[0];
+ } else {
+ DBUG_ASSERT(field_var->length_bytes == 2);
+ data_len = uint2korr(data_len_str);
+ }
+
+ if (data_len > field_var->field_length) {
+ // The data on disk is longer than table DDL allows?
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (!reader->read(data_len)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (decode) {
+ memcpy(field_var->ptr, data_len_str, field_var->length_bytes + data_len);
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+template <typename value_field_decoder>
+Rdb_value_field_iterator<value_field_decoder>::Rdb_value_field_iterator(
+ TABLE *table, Rdb_string_reader *value_slice_reader,
+ const Rdb_converter *rdb_converter, uchar *const buf)
+ : m_buf(buf) {
+ DBUG_ASSERT(table != nullptr);
+ DBUG_ASSERT(buf != nullptr);
+
+ m_table = table;
+ m_value_slice_reader = value_slice_reader;
+ auto fields = rdb_converter->get_decode_fields();
+ m_field_iter = fields->begin();
+ m_field_end = fields->end();
+ m_null_bytes = rdb_converter->get_null_bytes();
+ m_offset = 0;
+}
+
+// Iterate each requested field and decode one by one
+template <typename value_field_decoder>
+int Rdb_value_field_iterator<value_field_decoder>::next() {
+ int err = HA_EXIT_SUCCESS;
+ while (m_field_iter != m_field_end) {
+ m_field_dec = m_field_iter->m_field_enc;
+ bool decode = m_field_iter->m_decode;
+ bool maybe_null = m_field_dec->maybe_null();
+ // This is_null value is bind to how stroage format store its value
+ m_is_null = maybe_null && ((m_null_bytes[m_field_dec->m_null_offset] &
+ m_field_dec->m_null_mask) != 0);
+
+ // Skip the bytes we need to skip
+ int skip = m_field_iter->m_skip;
+ if (skip && !m_value_slice_reader->read(skip)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ m_field = m_table->field[m_field_dec->m_field_index];
+ // Decode each field
+ err = value_field_decoder::decode(m_buf, &m_offset, m_table, m_field,
+ m_field_dec, m_value_slice_reader, decode,
+ m_is_null);
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+ m_field_iter++;
+ // Only break for the field that are actually decoding rather than skipping
+ if (decode) {
+ break;
+ }
+ }
+ return err;
+}
+
+template <typename value_field_decoder>
+bool Rdb_value_field_iterator<value_field_decoder>::end_of_fields() const {
+ return m_field_iter == m_field_end;
+}
+
+template <typename value_field_decoder>
+Field *Rdb_value_field_iterator<value_field_decoder>::get_field() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_field;
+}
+
+template <typename value_field_decoder>
+void *Rdb_value_field_iterator<value_field_decoder>::get_dst() const {
+ DBUG_ASSERT(m_buf != nullptr);
+ return m_buf + m_offset;
+}
+
+template <typename value_field_decoder>
+int Rdb_value_field_iterator<value_field_decoder>::get_field_index() const {
+ DBUG_ASSERT(m_field_dec != nullptr);
+ return m_field_dec->m_field_index;
+}
+
+template <typename value_field_decoder>
+enum_field_types Rdb_value_field_iterator<value_field_decoder>::get_field_type()
+ const {
+ DBUG_ASSERT(m_field_dec != nullptr);
+ return m_field_dec->m_field_type;
+}
+
+template <typename value_field_decoder>
+bool Rdb_value_field_iterator<value_field_decoder>::is_null() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_is_null;
+}
+
+/*
+ Initialize Rdb_converter with table data
+ @param thd IN Thread context
+ @param tbl_def IN MyRocks table definition
+ @param table IN Current open table
+*/
+Rdb_converter::Rdb_converter(const THD *thd, const Rdb_tbl_def *tbl_def,
+ TABLE *table)
+ : m_thd(thd), m_tbl_def(tbl_def), m_table(table) {
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(tbl_def != nullptr);
+ DBUG_ASSERT(table != nullptr);
+
+ m_key_requested = false;
+ m_verify_row_debug_checksums = false;
+ m_maybe_unpack_info = false;
+ m_row_checksums_checked = 0;
+ m_null_bytes = nullptr;
+ setup_field_encoders();
+}
+
+Rdb_converter::~Rdb_converter() {
+ my_free(m_encoder_arr);
+ m_encoder_arr = nullptr;
+ // These are needed to suppress valgrind errors in rocksdb.partition
+ m_storage_record.free();
+}
+
+/*
+ Decide storage type for each encoder
+*/
+void Rdb_converter::get_storage_type(Rdb_field_encoder *const encoder,
+ const uint kp) {
+ auto pk_descr =
+ m_tbl_def->m_key_descr_arr[ha_rocksdb::pk_index(m_table, m_tbl_def)];
+ // STORE_SOME uses unpack_info.
+ if (pk_descr->has_unpack_info(kp)) {
+ DBUG_ASSERT(pk_descr->can_unpack(kp));
+ encoder->m_storage_type = Rdb_field_encoder::STORE_SOME;
+ m_maybe_unpack_info = true;
+ } else if (pk_descr->can_unpack(kp)) {
+ encoder->m_storage_type = Rdb_field_encoder::STORE_NONE;
+ }
+}
+
+/*
+ @brief
+ Setup which fields will be unpacked when reading rows
+
+ @detail
+ Three special cases when we still unpack all fields:
+ - When client requires decode_all_fields, such as this table is being
+ updated (m_lock_rows==RDB_LOCK_WRITE).
+ - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to
+ read all fields to find whether there is a row checksum at the end. We could
+ skip the fields instead of decoding them, but currently we do decoding.)
+ - On index merge as bitmap is cleared during that operation
+
+ @seealso
+ Rdb_converter::setup_field_encoders()
+ Rdb_converter::convert_record_from_storage_format()
+*/
+void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map,
+ bool decode_all_fields) {
+ m_key_requested = false;
+ m_decoders_vect.clear();
+ int last_useful = 0;
+ int skip_size = 0;
+
+ for (uint i = 0; i < m_table->s->fields; i++) {
+ // bitmap is cleared on index merge, but it still needs to decode columns
+ bool field_requested =
+ decode_all_fields || m_verify_row_debug_checksums ||
+ bitmap_is_clear_all(field_map) ||
+ bitmap_is_set(field_map, m_table->field[i]->field_index);
+
+ // We only need the decoder if the whole record is stored.
+ if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) {
+ // the field potentially needs unpacking
+ if (field_requested) {
+ // the field is in the read set
+ m_key_requested = true;
+ }
+ continue;
+ }
+
+ if (field_requested) {
+ // We will need to decode this field
+ m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size});
+ last_useful = m_decoders_vect.size();
+ skip_size = 0;
+ } else {
+ if (m_encoder_arr[i].uses_variable_len_encoding() ||
+ m_encoder_arr[i].maybe_null()) {
+ // For variable-length field, we need to read the data and skip it
+ m_decoders_vect.push_back({&m_encoder_arr[i], false, skip_size});
+ skip_size = 0;
+ } else {
+ // Fixed-width field can be skipped without looking at it.
+ // Add appropriate skip_size to the next field.
+ skip_size += m_encoder_arr[i].m_pack_length_in_rec;
+ }
+ }
+ }
+
+ // It could be that the last few elements are varchars that just do
+ // skipping. Remove them.
+ m_decoders_vect.erase(m_decoders_vect.begin() + last_useful,
+ m_decoders_vect.end());
+}
+
+void Rdb_converter::setup_field_encoders() {
+ uint null_bytes_length = 0;
+ uchar cur_null_mask = 0x1;
+
+ m_encoder_arr = static_cast<Rdb_field_encoder *>(
+ my_malloc(m_table->s->fields * sizeof(Rdb_field_encoder), MYF(0)));
+ if (m_encoder_arr == nullptr) {
+ return;
+ }
+
+ for (uint i = 0; i < m_table->s->fields; i++) {
+ Field *const field = m_table->field[i];
+ m_encoder_arr[i].m_storage_type = Rdb_field_encoder::STORE_ALL;
+
+ /*
+ Check if this field is
+ - a part of primary key, and
+ - it can be decoded back from its key image.
+ If both hold, we don't need to store this field in the value part of
+ RocksDB's key-value pair.
+
+ If hidden pk exists, we skip this check since the field will never be
+ part of the hidden pk.
+ */
+ if (!Rdb_key_def::table_has_hidden_pk(m_table)) {
+ KEY *const pk_info = &m_table->key_info[m_table->s->primary_key];
+ for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) {
+ // key_part->fieldnr is counted from 1
+ if (field->field_index + 1 == pk_info->key_part[kp].fieldnr) {
+ get_storage_type(&m_encoder_arr[i], kp);
+ break;
+ }
+ }
+ }
+
+ m_encoder_arr[i].m_field_type = field->real_type();
+ m_encoder_arr[i].m_field_index = i;
+ m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec();
+
+ if (field->real_maybe_null()) {
+ m_encoder_arr[i].m_null_mask = cur_null_mask;
+ m_encoder_arr[i].m_null_offset = null_bytes_length;
+ if (cur_null_mask == 0x80) {
+ cur_null_mask = 0x1;
+ null_bytes_length++;
+ } else {
+ cur_null_mask = cur_null_mask << 1;
+ }
+ } else {
+ m_encoder_arr[i].m_null_mask = 0;
+ }
+ }
+
+ // Count the last, unfinished NULL-bits byte
+ if (cur_null_mask != 0x1) {
+ null_bytes_length++;
+ }
+
+ m_null_bytes_length_in_record = null_bytes_length;
+}
+
+/*
+ EntryPoint for Decode:
+ Decode key slice(if requested) and value slice using built-in field
+ decoders
+ @param key_def IN key definition to decode
+ @param dst OUT Mysql buffer to fill decoded content
+ @param key_slice IN RocksDB key slice to decode
+ @param value_slice IN RocksDB value slice to decode
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::decode(const std::shared_ptr<Rdb_key_def> &key_def,
+ uchar *dst, // address to fill data
+ const rocksdb::Slice *key_slice,
+ const rocksdb::Slice *value_slice) {
+ // Currently only support decode primary key, Will add decode secondary later
+ DBUG_ASSERT(key_def->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY ||
+ key_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY);
+
+ const rocksdb::Slice *updated_key_slice = key_slice;
+#ifndef DBUG_OFF
+ String last_rowkey;
+ last_rowkey.copy(key_slice->data(), key_slice->size(), &my_charset_bin);
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_read1",
+ { dbug_modify_key_varchar8(&last_rowkey); });
+ rocksdb::Slice rowkey_slice(last_rowkey.ptr(), last_rowkey.length());
+ updated_key_slice = &rowkey_slice;
+#endif
+ return convert_record_from_storage_format(key_def, updated_key_slice,
+ value_slice, dst);
+}
+
+/*
+ Decode value slice header
+ @param reader IN value slice reader
+ @param pk_def IN key definition to decode
+ @param unpack_slice OUT unpack info slice
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::decode_value_header(
+ Rdb_string_reader *reader, const std::shared_ptr<Rdb_key_def> &pk_def,
+ rocksdb::Slice *unpack_slice) {
+ /* If it's a TTL record, skip the 8 byte TTL value */
+ if (pk_def->has_ttl()) {
+ const char *ttl_bytes;
+ if ((ttl_bytes = reader->read(ROCKSDB_SIZEOF_TTL_RECORD))) {
+ memcpy(m_ttl_bytes, ttl_bytes, ROCKSDB_SIZEOF_TTL_RECORD);
+ } else {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
+ /* Other fields are decoded from the value */
+ if (m_null_bytes_length_in_record &&
+ !(m_null_bytes = reader->read(m_null_bytes_length_in_record))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ if (m_maybe_unpack_info) {
+ const char *unpack_info = reader->get_current_ptr();
+ if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) ||
+ !reader->read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+
+ uint16 unpack_info_len =
+ rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1));
+ *unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len);
+
+ reader->read(unpack_info_len -
+ Rdb_key_def::get_unpack_header_size(unpack_info[0]));
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Convert RocksDb key slice and value slice to Mysql format
+ @param key_def IN key definition to decode
+ @param key_slice IN RocksDB key slice
+ @param value_slice IN RocksDB value slice
+ @param dst OUT MySql format address
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::convert_record_from_storage_format(
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice *const key_slice,
+ const rocksdb::Slice *const value_slice, uchar *const dst) {
+ int err = HA_EXIT_SUCCESS;
+
+ Rdb_string_reader value_slice_reader(value_slice);
+ rocksdb::Slice unpack_slice;
+ err = decode_value_header(&value_slice_reader, pk_def, &unpack_slice);
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+
+ /*
+ Decode PK fields from the key
+ */
+ if (m_key_requested) {
+ err = pk_def->unpack_record(m_table, dst, key_slice,
+ !unpack_slice.empty() ? &unpack_slice : nullptr,
+ false /* verify_checksum */);
+ }
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+
+ Rdb_value_field_iterator<Rdb_convert_to_record_value_decoder>
+ value_field_iterator(m_table, &value_slice_reader, this, dst);
+
+ // Decode value slices
+ while (!value_field_iterator.end_of_fields()) {
+ err = value_field_iterator.next();
+
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
+ }
+ }
+
+ if (m_verify_row_debug_checksums) {
+ return verify_row_debug_checksum(pk_def, &value_slice_reader, key_slice,
+ value_slice);
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Verify checksum for row
+ @param pk_def IN key def
+ @param reader IN RocksDB value slice reader
+ @param key IN RocksDB key slice
+ @param value IN RocksDB value slice
+ @return
+ 0 OK
+ other HA_ERR error code (can be SE-specific)
+*/
+int Rdb_converter::verify_row_debug_checksum(
+ const std::shared_ptr<Rdb_key_def> &pk_def, Rdb_string_reader *reader,
+ const rocksdb::Slice *key, const rocksdb::Slice *value) {
+ if (reader->remaining_bytes() == RDB_CHECKSUM_CHUNK_SIZE &&
+ reader->read(1)[0] == RDB_CHECKSUM_DATA_TAG) {
+ uint32_t stored_key_chksum =
+ rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE));
+ uint32_t stored_val_chksum =
+ rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE));
+
+ const uint32_t computed_key_chksum =
+ my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size());
+ const uint32_t computed_val_chksum =
+ my_core::crc32(0, rdb_slice_to_uchar_ptr(value),
+ value->size() - RDB_CHECKSUM_CHUNK_SIZE);
+
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", stored_key_chksum++;);
+
+ if (stored_key_chksum != computed_key_chksum) {
+ pk_def->report_checksum_mismatch(true, key->data(), key->size());
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
+ }
+
+ DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2", stored_val_chksum++;);
+ if (stored_val_chksum != computed_val_chksum) {
+ pk_def->report_checksum_mismatch(false, value->data(), value->size());
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
+ }
+
+ m_row_checksums_checked++;
+ }
+ if (reader->remaining_bytes()) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/**
+ Convert record from table->record[0] form into a form that can be written
+ into rocksdb.
+
+ @param pk_def IN Current key def
+ @pk_unpack_info IN Unpack info generated during key pack
+ @is_update_row IN Whether it is update row
+ @store_row_debug_checksums IN Whether to store checksums
+ @param ttl_bytes IN/OUT Old ttl value from previous record and
+ ttl value during current encode
+ @is_ttl_bytes_updated OUT Whether ttl bytes is updated
+ @param value_slice OUT Data slice with record data.
+*/
+int Rdb_converter::encode_value_slice(
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice &pk_packed_slice, Rdb_string_writer *pk_unpack_info,
+ bool is_update_row, bool store_row_debug_checksums, char *ttl_bytes,
+ bool *is_ttl_bytes_updated, rocksdb::Slice *const value_slice) {
+ DBUG_ASSERT(pk_def != nullptr);
+ // Currently only primary key will store value slice
+ DBUG_ASSERT(pk_def->m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY ||
+ pk_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY);
+ DBUG_ASSERT_IMP(m_maybe_unpack_info, pk_unpack_info);
+
+ bool has_ttl = pk_def->has_ttl();
+ bool has_ttl_column = !pk_def->m_ttl_column.empty();
+
+ m_storage_record.length(0);
+
+ if (has_ttl) {
+ /* If it's a TTL record, reserve space for 8 byte TTL value in front. */
+ m_storage_record.fill(
+ ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_length_in_record, 0);
+ // NOTE: is_ttl_bytes_updated is only used for update case
+ // During update, skip update sk key/values slice iff none of sk fields
+ // have changed and ttl bytes isn't changed. see
+ // ha_rocksdb::update_write_sk() for more info
+ *is_ttl_bytes_updated = false;
+ char *const data = const_cast<char *>(m_storage_record.ptr());
+ if (has_ttl_column) {
+ DBUG_ASSERT(pk_def->get_ttl_field_index() != UINT_MAX);
+ Field *const field = m_table->field[pk_def->get_ttl_field_index()];
+ DBUG_ASSERT(field->pack_length_in_rec() == ROCKSDB_SIZEOF_TTL_RECORD);
+ DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
+
+ uint64 ts = uint8korr(field->ptr);
+#ifndef DBUG_OFF
+ ts += rdb_dbug_set_ttl_rec_ts();
+#endif
+ rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+ if (is_update_row) {
+ *is_ttl_bytes_updated =
+ memcmp(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ // Also store in m_ttl_bytes to propagate to update_write_sk
+ memcpy(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ } else {
+ /*
+ For implicitly generated TTL records we need to copy over the old
+ TTL value from the old record in the event of an update. It was stored
+ in m_ttl_bytes.
+
+ Otherwise, generate a timestamp using the current time.
+ */
+ if (is_update_row) {
+ memcpy(data, ttl_bytes, sizeof(uint64));
+ } else {
+ uint64 ts = static_cast<uint64>(std::time(nullptr));
+#ifndef DBUG_OFF
+ ts += rdb_dbug_set_ttl_rec_ts();
+#endif
+ rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+ // Also store in m_ttl_bytes to propagate to update_write_sk
+ memcpy(ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ }
+ } else {
+ /* All NULL bits are initially 0 */
+ m_storage_record.fill(m_null_bytes_length_in_record, 0);
+ }
+
+ // If a primary key may have non-empty unpack_info for certain values,
+ // (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block
+ // itself was prepared in Rdb_key_def::pack_record.
+ if (m_maybe_unpack_info) {
+ m_storage_record.append(reinterpret_cast<char *>(pk_unpack_info->ptr()),
+ pk_unpack_info->get_current_pos());
+ }
+ for (uint i = 0; i < m_table->s->fields; i++) {
+ Rdb_field_encoder &encoder = m_encoder_arr[i];
+ /* Don't pack decodable PK key parts */
+ if (encoder.m_storage_type != Rdb_field_encoder::STORE_ALL) {
+ continue;
+ }
+
+ Field *const field = m_table->field[i];
+ if (encoder.maybe_null()) {
+ char *data = const_cast<char *>(m_storage_record.ptr());
+ if (has_ttl) {
+ data += ROCKSDB_SIZEOF_TTL_RECORD;
+ }
+
+ if (field->is_null()) {
+ data[encoder.m_null_offset] |= encoder.m_null_mask;
+ /* Don't write anything for NULL values */
+ continue;
+ }
+ }
+
+ if (encoder.m_field_type == MYSQL_TYPE_BLOB) {
+ my_core::Field_blob *blob =
+ reinterpret_cast<my_core::Field_blob *>(field);
+ /* Get the number of bytes needed to store length*/
+ const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr;
+
+ /* Store the length of the value */
+ m_storage_record.append(reinterpret_cast<char *>(blob->ptr),
+ length_bytes);
+
+ /* Store the blob value itself */
+ char *data_ptr;
+ memcpy(&data_ptr, blob->ptr + length_bytes, sizeof(uchar **));
+ m_storage_record.append(data_ptr, blob->get_length());
+ } else if (encoder.m_field_type == MYSQL_TYPE_VARCHAR) {
+ Field_varstring *const field_var =
+ reinterpret_cast<Field_varstring *>(field);
+ uint data_len;
+ /* field_var->length_bytes is 1 or 2 */
+ if (field_var->length_bytes == 1) {
+ data_len = field_var->ptr[0];
+ } else {
+ DBUG_ASSERT(field_var->length_bytes == 2);
+ data_len = uint2korr(field_var->ptr);
+ }
+ m_storage_record.append(reinterpret_cast<char *>(field_var->ptr),
+ field_var->length_bytes + data_len);
+ } else {
+ /* Copy the field data */
+ const uint len = field->pack_length_in_rec();
+ m_storage_record.append(reinterpret_cast<char *>(field->ptr), len);
+ }
+ }
+
+ if (store_row_debug_checksums) {
+ const uint32_t key_crc32 = my_core::crc32(
+ 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size());
+ const uint32_t val_crc32 =
+ my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record),
+ m_storage_record.length());
+ uchar key_crc_buf[RDB_CHECKSUM_SIZE];
+ uchar val_crc_buf[RDB_CHECKSUM_SIZE];
+ rdb_netbuf_store_uint32(key_crc_buf, key_crc32);
+ rdb_netbuf_store_uint32(val_crc_buf, val_crc32);
+ m_storage_record.append((const char *)&RDB_CHECKSUM_DATA_TAG, 1);
+ m_storage_record.append((const char *)key_crc_buf, RDB_CHECKSUM_SIZE);
+ m_storage_record.append((const char *)val_crc_buf, RDB_CHECKSUM_SIZE);
+ }
+
+ *value_slice =
+ rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length());
+
+ return HA_EXIT_SUCCESS;
+}
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_converter.h b/storage/rocksdb/rdb_converter.h
new file mode 100644
index 00000000000..a4eae341f16
--- /dev/null
+++ b/storage/rocksdb/rdb_converter.h
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2018, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#pragma once
+
+// C++ standard header files
+#include <string>
+#include <vector>
+
+// MySQL header files
+#include "./handler.h" // handler
+#include "./my_global.h" // ulonglong
+#include "./sql_string.h"
+#include "./ut0counter.h"
+
+// MyRocks header files
+#include "./ha_rocksdb.h"
+#include "./rdb_datadic.h"
+
+namespace myrocks {
+class Rdb_field_encoder;
+
+/**
+ Describes instructions on how to decode the field for value slice
+*/
+struct READ_FIELD {
+ // Points to Rdb_field_encoder describing the field
+ Rdb_field_encoder *m_field_enc;
+ // if true, decode the field, otherwise skip it
+ bool m_decode;
+ // Skip this many bytes before reading (or skipping) this field
+ int m_skip;
+};
+
+/**
+ Class to convert rocksdb value slice from storage format to mysql record
+ format.
+*/
+class Rdb_convert_to_record_value_decoder {
+ public:
+ Rdb_convert_to_record_value_decoder() = delete;
+ Rdb_convert_to_record_value_decoder(
+ const Rdb_convert_to_record_value_decoder &decoder) = delete;
+ Rdb_convert_to_record_value_decoder &operator=(
+ const Rdb_convert_to_record_value_decoder &decoder) = delete;
+
+ static int decode(uchar *const buf, uint *offset, TABLE *table,
+ my_core::Field *field, Rdb_field_encoder *field_dec,
+ Rdb_string_reader *reader, bool decode, bool is_null);
+
+ private:
+ static int decode_blob(TABLE *table, Field *field, Rdb_string_reader *reader,
+ bool decode);
+ static int decode_fixed_length_field(Field *const field,
+ Rdb_field_encoder *field_dec,
+ Rdb_string_reader *const reader,
+ bool decode);
+
+ static int decode_varchar(Field *const field, Rdb_string_reader *const reader,
+ bool decode);
+};
+
+/**
+ Class to iterator fields in RocksDB value slice
+ A template class instantiation represent a way to decode the data.
+ The reason to use template class instead of normal class is to elimate
+ virtual method call.
+*/
+template <typename value_field_decoder>
+class Rdb_value_field_iterator {
+ private:
+ bool m_is_null;
+ std::vector<READ_FIELD>::const_iterator m_field_iter;
+ std::vector<READ_FIELD>::const_iterator m_field_end;
+ Rdb_string_reader *m_value_slice_reader;
+ // null value map
+ const char *m_null_bytes;
+ // The current open table
+ TABLE *m_table;
+ // The current field
+ Field *m_field;
+ Rdb_field_encoder *m_field_dec;
+ uchar *const m_buf;
+ uint m_offset;
+
+ public:
+ Rdb_value_field_iterator(TABLE *table, Rdb_string_reader *value_slice_reader,
+ const Rdb_converter *rdb_converter,
+ uchar *const buf);
+ Rdb_value_field_iterator(const Rdb_value_field_iterator &field_iterator) =
+ delete;
+ Rdb_value_field_iterator &operator=(
+ const Rdb_value_field_iterator &field_iterator) = delete;
+
+ /*
+ Move and decode next field
+ Run next() before accessing data
+ */
+ int next();
+ // Whether current field is the end of fields
+ bool end_of_fields() const;
+ void *get_dst() const;
+ // Whether the value of current field is null
+ bool is_null() const;
+ // get current field index
+ int get_field_index() const;
+ // get current field type
+ enum_field_types get_field_type() const;
+ // get current field
+ Field *get_field() const;
+};
+
+/**
+ Class to convert Mysql formats to rocksdb storage format, and vice versa.
+*/
+class Rdb_converter {
+ public:
+ /*
+ Initialize converter with table data
+ */
+ Rdb_converter(const THD *thd, const Rdb_tbl_def *tbl_def, TABLE *table);
+ Rdb_converter(const Rdb_converter &decoder) = delete;
+ Rdb_converter &operator=(const Rdb_converter &decoder) = delete;
+ ~Rdb_converter();
+
+ void setup_field_decoders(const MY_BITMAP *field_map,
+ bool decode_all_fields = false);
+
+ int decode(const std::shared_ptr<Rdb_key_def> &key_def, uchar *dst,
+ const rocksdb::Slice *key_slice,
+ const rocksdb::Slice *value_slice);
+
+ int encode_value_slice(const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice &pk_packed_slice,
+ Rdb_string_writer *pk_unpack_info, bool is_update_row,
+ bool store_row_debug_checksums, char *ttl_bytes,
+ bool *is_ttl_bytes_updated,
+ rocksdb::Slice *const value_slice);
+
+ my_core::ha_rows get_row_checksums_checked() const {
+ return m_row_checksums_checked;
+ }
+ bool get_verify_row_debug_checksums() const {
+ return m_verify_row_debug_checksums;
+ }
+ void set_verify_row_debug_checksums(bool verify_row_debug_checksums) {
+ m_verify_row_debug_checksums = verify_row_debug_checksums;
+ }
+
+ const Rdb_field_encoder *get_encoder_arr() const { return m_encoder_arr; }
+ int get_null_bytes_in_record() { return m_null_bytes_length_in_record; }
+ const char *get_null_bytes() const { return m_null_bytes; }
+ void set_is_key_requested(bool key_requested) {
+ m_key_requested = key_requested;
+ }
+ bool get_maybe_unpack_info() const { return m_maybe_unpack_info; }
+
+ char *get_ttl_bytes_buffer() { return m_ttl_bytes; }
+
+ const std::vector<READ_FIELD> *get_decode_fields() const {
+ return &m_decoders_vect;
+ }
+
+ private:
+ int decode_value_header(Rdb_string_reader *reader,
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ rocksdb::Slice *unpack_slice);
+
+ void setup_field_encoders();
+
+ void get_storage_type(Rdb_field_encoder *const encoder, const uint kp);
+
+ int convert_record_from_storage_format(
+ const std::shared_ptr<Rdb_key_def> &pk_def,
+ const rocksdb::Slice *const key, const rocksdb::Slice *const value,
+ uchar *const buf);
+
+ int verify_row_debug_checksum(const std::shared_ptr<Rdb_key_def> &pk_def,
+ Rdb_string_reader *reader,
+ const rocksdb::Slice *key,
+ const rocksdb::Slice *value);
+
+ private:
+ /*
+ This tells if any field which is part of the key needs to be unpacked and
+ decoded.
+ */
+ bool m_key_requested;
+ /*
+ Controls whether verifying checksums during reading, This is updated from
+ the session variable at the start of each query.
+ */
+ bool m_verify_row_debug_checksums;
+ // Thread handle
+ const THD *m_thd;
+ /* MyRocks table definition*/
+ const Rdb_tbl_def *m_tbl_def;
+ /* The current open table */
+ TABLE *m_table;
+ /*
+ Number of bytes in on-disk (storage) record format that are used for
+ storing SQL NULL flags.
+ */
+ int m_null_bytes_length_in_record;
+ /*
+ Pointer to null bytes value
+ */
+ const char *m_null_bytes;
+ /*
+ TRUE <=> Some fields in the PK may require unpack_info.
+ */
+ bool m_maybe_unpack_info;
+ /*
+ Pointer to the original TTL timestamp value (8 bytes) during UPDATE.
+ */
+ char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD];
+ /*
+ Array of table->s->fields elements telling how to store fields in the
+ record.
+ */
+ Rdb_field_encoder *m_encoder_arr;
+ /*
+ Array of request fields telling how to decode data in RocksDB format
+ */
+ std::vector<READ_FIELD> m_decoders_vect;
+ /*
+ A counter of how many row checksums were checked for this table. Note that
+ this does not include checksums for secondary index entries.
+ */
+ my_core::ha_rows m_row_checksums_checked;
+ // buffer to hold data during encode_value_slice
+ String m_storage_record;
+};
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index 8e286547940..fee5d24eb66 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -15,7 +15,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
+#pragma implementation // gcc: Class implementation
#endif
/* For use of 'PRIu64': */
@@ -45,9 +45,11 @@
#include "./sql_table.h"
/* MyRocks header files */
+#include "./ha_rocksdb.h"
#include "./ha_rocksdb_proto.h"
#include "./my_stacktrace.h"
#include "./rdb_cf_manager.h"
+#include "./rdb_psi.h"
#include "./rdb_utils.h"
namespace myrocks {
@@ -68,9 +70,229 @@ inline bool field_check_field_name_match(Field *field, const char *name)
/*
- Rdb_key_def class implementation
+ Decode current key field
+ @param fpi IN data structure contains field metadata
+ @param field IN current field
+ @param reader IN key slice reader
+ @param unp_reader IN unpack information reader
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
+*/
+int Rdb_convert_to_record_key_decoder::decode_field(
+ Rdb_field_packing *fpi, Field *field, Rdb_string_reader *reader,
+ const uchar *const default_value, Rdb_string_reader *unpack_reader) {
+ if (fpi->m_maybe_null) {
+ const char *nullp;
+ if (!(nullp = reader->read(1))) {
+ return HA_EXIT_FAILURE;
+ }
+
+ if (*nullp == 0) {
+ /* Set the NULL-bit of this field */
+ field->set_null();
+ /* Also set the field to its default value */
+ memcpy(field->ptr, default_value, field->pack_length());
+ return HA_EXIT_SUCCESS;
+ } else if (*nullp == 1) {
+ field->set_notnull();
+ } else {
+ return HA_EXIT_FAILURE;
+ }
+ }
+
+ return (fpi->m_unpack_func)(fpi, field, field->ptr, reader, unpack_reader);
+}
+
+/*
+ Decode current key field
+
+ @param buf OUT the buf starting address
+ @param offset OUT the bytes offset when data is written
+ @param fpi IN data structure contains field metadata
+ @param table IN current table
+ @param field IN current field
+ @param has_unpack_inf IN whether contains unpack inf
+ @param reader IN key slice reader
+ @param unp_reader IN unpack information reader
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
*/
+int Rdb_convert_to_record_key_decoder::decode(
+ uchar *const buf, uint *offset, Rdb_field_packing *fpi, TABLE *table,
+ Field *field, bool has_unpack_info, Rdb_string_reader *reader,
+ Rdb_string_reader *unpack_reader) {
+ DBUG_ASSERT(buf != nullptr);
+ DBUG_ASSERT(offset != nullptr);
+
+ uint field_offset = field->ptr - table->record[0];
+ *offset = field_offset;
+ uint null_offset = field->null_offset();
+ bool maybe_null = field->real_maybe_null();
+
+ field->move_field(buf + field_offset,
+ maybe_null ? buf + null_offset : nullptr, field->null_bit);
+
+ // If we need unpack info, but there is none, tell the unpack function
+ // this by passing unp_reader as nullptr. If we never read unpack_info
+ // during unpacking anyway, then there won't an error.
+ bool maybe_missing_unpack = !has_unpack_info && fpi->uses_unpack_info();
+
+ int res =
+ decode_field(fpi, field, reader, table->s->default_values + field_offset,
+ maybe_missing_unpack ? nullptr : unpack_reader);
+
+ // Restore field->ptr and field->null_ptr
+ field->move_field(table->record[0] + field_offset,
+ maybe_null ? table->record[0] + null_offset : nullptr,
+ field->null_bit);
+ if (res != UNPACK_SUCCESS) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Skip current key field
+ @param fpi IN data structure contains field metadata
+ @param field IN current field
+ @param reader IN key slice reader
+ @param unp_reader IN unpack information reader
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
+*/
+int Rdb_convert_to_record_key_decoder::skip(const Rdb_field_packing *fpi,
+ const Field *field,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unp_reader) {
+ /* It is impossible to unpack the column. Skip it. */
+ if (fpi->m_maybe_null) {
+ const char *nullp;
+ if (!(nullp = reader->read(1))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ if (*nullp == 0) {
+ /* This is a NULL value */
+ return HA_EXIT_SUCCESS;
+ }
+ /* If NULL marker is not '0', it can be only '1' */
+ if (*nullp != 1) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+ if ((fpi->m_skip_func)(fpi, field, reader)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ // If this is a space padded varchar, we need to skip the indicator
+ // bytes for trailing bytes. They're useless since we can't restore the
+ // field anyway.
+ //
+ // There is a special case for prefixed varchars where we do not
+ // generate unpack info, because we know prefixed varchars cannot be
+ // unpacked. In this case, it is not necessary to skip.
+ if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad &&
+ !fpi->m_unpack_info_stores_value) {
+ unp_reader->read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1);
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+Rdb_key_field_iterator::Rdb_key_field_iterator(
+ const Rdb_key_def *key_def, Rdb_field_packing *pack_info,
+ Rdb_string_reader *reader, Rdb_string_reader *unp_reader, TABLE *table,
+ bool has_unpack_info, const MY_BITMAP *covered_bitmap, uchar *const buf) {
+ m_key_def = key_def;
+ m_pack_info = pack_info;
+ m_iter_index = 0;
+ m_iter_end = key_def->get_key_parts();
+ m_reader = reader;
+ m_unp_reader = unp_reader;
+ m_table = table;
+ m_has_unpack_info = has_unpack_info;
+ m_covered_bitmap = covered_bitmap;
+ m_buf = buf;
+ m_secondary_key =
+ (key_def->m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY);
+ m_hidden_pk_exists = Rdb_key_def::table_has_hidden_pk(table);
+ m_is_hidden_pk =
+ (key_def->m_index_type == Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY);
+ m_curr_bitmap_pos = 0;
+ m_offset = 0;
+}
+
+void *Rdb_key_field_iterator::get_dst() const { return m_buf + m_offset; }
+
+int Rdb_key_field_iterator::get_field_index() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_field->field_index;
+}
+
+bool Rdb_key_field_iterator::get_is_null() const { return m_is_null; }
+Field *Rdb_key_field_iterator::get_field() const {
+ DBUG_ASSERT(m_field != nullptr);
+ return m_field;
+}
+
+bool Rdb_key_field_iterator::has_next() { return m_iter_index < m_iter_end; }
+
+/**
+ Iterate each field in the key and decode/skip one by one
+*/
+int Rdb_key_field_iterator::next() {
+ int status = HA_EXIT_SUCCESS;
+ while (m_iter_index < m_iter_end) {
+ int curr_index = m_iter_index++;
+
+ m_fpi = &m_pack_info[curr_index];
+ /*
+ Hidden pk field is packed at the end of the secondary keys, but the SQL
+ layer does not know about it. Skip retrieving field if hidden pk.
+ */
+ if ((m_secondary_key && m_hidden_pk_exists &&
+ curr_index + 1 == m_iter_end) ||
+ m_is_hidden_pk) {
+ DBUG_ASSERT(m_fpi->m_unpack_func);
+ if ((m_fpi->m_skip_func)(m_fpi, nullptr, m_reader)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ return HA_EXIT_SUCCESS;
+ }
+
+ m_field = m_fpi->get_field_in_table(m_table);
+
+ bool covered_column = true;
+ if (m_covered_bitmap != nullptr &&
+ m_field->real_type() == MYSQL_TYPE_VARCHAR && !m_fpi->m_covered) {
+ covered_column = m_curr_bitmap_pos < MAX_REF_PARTS &&
+ bitmap_is_set(m_covered_bitmap, m_curr_bitmap_pos++);
+ }
+
+ if (m_fpi->m_unpack_func && covered_column) {
+ /* It is possible to unpack this column. Do it. */
+ status = Rdb_convert_to_record_key_decoder::decode(
+ m_buf, &m_offset, m_fpi, m_table, m_field, m_has_unpack_info,
+ m_reader, m_unp_reader);
+ if (status) {
+ return status;
+ }
+ break;
+ } else {
+ status = Rdb_convert_to_record_key_decoder::skip(m_fpi, m_field, m_reader,
+ m_unp_reader);
+ if (status) {
+ return status;
+ }
+ }
+ }
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Rdb_key_def class implementation
+*/
Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg,
rocksdb::ColumnFamilyHandle *cf_handle_arg,
uint16_t index_dict_version_arg, uchar index_type_arg,
@@ -78,16 +300,26 @@ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg,
bool is_per_partition_cf_arg, const char *_name,
Rdb_index_stats _stats, uint32 index_flags_bitmap,
uint32 ttl_rec_offset, uint64 ttl_duration)
- : m_index_number(indexnr_arg), m_cf_handle(cf_handle_arg),
+ : m_index_number(indexnr_arg),
+ m_cf_handle(cf_handle_arg),
m_index_dict_version(index_dict_version_arg),
- m_index_type(index_type_arg), m_kv_format_version(kv_format_version_arg),
+ m_index_type(index_type_arg),
+ m_kv_format_version(kv_format_version_arg),
m_is_reverse_cf(is_reverse_cf_arg),
- m_is_per_partition_cf(is_per_partition_cf_arg), m_name(_name),
- m_stats(_stats), m_index_flags_bitmap(index_flags_bitmap),
- m_ttl_rec_offset(ttl_rec_offset), m_ttl_duration(ttl_duration),
- m_ttl_column(""), m_pk_part_no(nullptr), m_pack_info(nullptr),
- m_keyno(keyno_arg), m_key_parts(0), m_ttl_pk_key_part_offset(UINT_MAX),
- m_ttl_field_offset(UINT_MAX), m_prefix_extractor(nullptr),
+ m_is_per_partition_cf(is_per_partition_cf_arg),
+ m_name(_name),
+ m_stats(_stats),
+ m_index_flags_bitmap(index_flags_bitmap),
+ m_ttl_rec_offset(ttl_rec_offset),
+ m_ttl_duration(ttl_duration),
+ m_ttl_column(""),
+ m_pk_part_no(nullptr),
+ m_pack_info(nullptr),
+ m_keyno(keyno_arg),
+ m_key_parts(0),
+ m_ttl_pk_key_part_offset(UINT_MAX),
+ m_ttl_field_index(UINT_MAX),
+ m_prefix_extractor(nullptr),
m_maxlength(0) // means 'not intialized'
{
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
@@ -104,16 +336,23 @@ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg,
}
Rdb_key_def::Rdb_key_def(const Rdb_key_def &k)
- : m_index_number(k.m_index_number), m_cf_handle(k.m_cf_handle),
+ : m_index_number(k.m_index_number),
+ m_cf_handle(k.m_cf_handle),
m_is_reverse_cf(k.m_is_reverse_cf),
- m_is_per_partition_cf(k.m_is_per_partition_cf), m_name(k.m_name),
- m_stats(k.m_stats), m_index_flags_bitmap(k.m_index_flags_bitmap),
- m_ttl_rec_offset(k.m_ttl_rec_offset), m_ttl_duration(k.m_ttl_duration),
- m_ttl_column(k.m_ttl_column), m_pk_part_no(k.m_pk_part_no),
- m_pack_info(k.m_pack_info), m_keyno(k.m_keyno),
+ m_is_per_partition_cf(k.m_is_per_partition_cf),
+ m_name(k.m_name),
+ m_stats(k.m_stats),
+ m_index_flags_bitmap(k.m_index_flags_bitmap),
+ m_ttl_rec_offset(k.m_ttl_rec_offset),
+ m_ttl_duration(k.m_ttl_duration),
+ m_ttl_column(k.m_ttl_column),
+ m_pk_part_no(k.m_pk_part_no),
+ m_pack_info(k.m_pack_info),
+ m_keyno(k.m_keyno),
m_key_parts(k.m_key_parts),
m_ttl_pk_key_part_offset(k.m_ttl_pk_key_part_offset),
- m_ttl_field_offset(UINT_MAX), m_prefix_extractor(k.m_prefix_extractor),
+ m_ttl_field_index(UINT_MAX),
+ m_prefix_extractor(k.m_prefix_extractor),
m_maxlength(k.m_maxlength) {
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
@@ -172,16 +411,15 @@ void Rdb_key_def::setup(const TABLE *const tbl,
KEY *pk_info = nullptr;
if (!is_hidden_pk) {
key_info = &tbl->key_info[m_keyno];
- if (!hidden_pk_exists)
- pk_info = &tbl->key_info[tbl->s->primary_key];
+ if (!hidden_pk_exists) pk_info = &tbl->key_info[tbl->s->primary_key];
m_name = std::string(key_info->name);
} else {
m_name = HIDDEN_PK_NAME;
}
- if (secondary_key)
+ if (secondary_key) {
m_pk_key_parts= hidden_pk_exists ? 1 : pk_info->ext_key_parts;
- else {
+ } else {
pk_info = nullptr;
m_pk_key_parts = 0;
}
@@ -207,11 +445,12 @@ void Rdb_key_def::setup(const TABLE *const tbl,
m_key_parts += m_pk_key_parts;
}
- if (secondary_key)
+ if (secondary_key) {
m_pk_part_no = reinterpret_cast<uint *>(
my_malloc(sizeof(uint) * m_key_parts, MYF(0)));
- else
+ } else {
m_pk_part_no = nullptr;
+ }
const size_t size = sizeof(Rdb_field_packing) * m_key_parts;
m_pack_info =
@@ -222,7 +461,7 @@ void Rdb_key_def::setup(const TABLE *const tbl,
table creation.
*/
Rdb_key_def::extract_ttl_col(tbl, tbl_def, &m_ttl_column,
- &m_ttl_field_offset, true);
+ &m_ttl_field_index, true);
size_t max_len = INDEX_NUMBER_SIZE;
int unpack_len = 0;
@@ -266,8 +505,7 @@ void Rdb_key_def::setup(const TABLE *const tbl,
}
}
- if (field && field->real_maybe_null())
- max_len += 1; // NULL-byte
+ if (field && field->real_maybe_null()) max_len += 1; // NULL-byte
m_pack_info[dst_i].setup(this, field, keyno_to_set, keypart_to_set,
key_part ? key_part->length : 0);
@@ -287,8 +525,7 @@ void Rdb_key_def::setup(const TABLE *const tbl,
appended to the end of the sk.
*/
m_pk_part_no[dst_i] = -1;
- if (simulating_extkey)
- m_pk_part_no[dst_i] = 0;
+ if (simulating_extkey) m_pk_part_no[dst_i] = 0;
}
max_len += m_pack_info[dst_i].m_max_image_len;
@@ -405,7 +642,7 @@ uint Rdb_key_def::extract_ttl_duration(const TABLE *const table_arg,
uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
const Rdb_tbl_def *const tbl_def_arg,
std::string *ttl_column,
- uint *ttl_field_offset, bool skip_checks) {
+ uint *ttl_field_index, bool skip_checks) {
std::string table_comment(table_arg->s->comment.str,
table_arg->s->comment.length);
/*
@@ -423,7 +660,7 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
Field *const field = table_arg->field[i];
if (field_check_field_name_match(field, ttl_col_str.c_str())) {
*ttl_column = ttl_col_str;
- *ttl_field_offset = i;
+ *ttl_field_index = i;
}
}
return HA_EXIT_SUCCESS;
@@ -439,7 +676,7 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
field->key_type() == HA_KEYTYPE_ULONGLONG &&
!field->real_maybe_null()) {
*ttl_column = ttl_col_str;
- *ttl_field_offset = i;
+ *ttl_field_index = i;
found = true;
break;
}
@@ -454,9 +691,8 @@ uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
return HA_EXIT_SUCCESS;
}
-const std::string
-Rdb_key_def::gen_qualifier_for_table(const char *const qualifier,
- const std::string &partition_name) {
+const std::string Rdb_key_def::gen_qualifier_for_table(
+ const char *const qualifier, const std::string &partition_name) {
bool has_partition = !partition_name.empty();
std::string qualifier_str = "";
@@ -484,8 +720,8 @@ Rdb_key_def::gen_qualifier_for_table(const char *const qualifier,
Formats the string and returns the column family name assignment part for a
specific partition.
*/
-const std::string
-Rdb_key_def::gen_cf_name_qualifier_for_partition(const std::string &prefix) {
+const std::string Rdb_key_def::gen_cf_name_qualifier_for_partition(
+ const std::string &prefix) {
DBUG_ASSERT(!prefix.empty());
return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_CF_NAME_QUALIFIER +
@@ -500,8 +736,8 @@ const std::string Rdb_key_def::gen_ttl_duration_qualifier_for_partition(
RDB_TTL_DURATION_QUALIFIER + RDB_QUALIFIER_VALUE_SEP;
}
-const std::string
-Rdb_key_def::gen_ttl_col_qualifier_for_partition(const std::string &prefix) {
+const std::string Rdb_key_def::gen_ttl_col_qualifier_for_partition(
+ const std::string &prefix) {
DBUG_ASSERT(!prefix.empty());
return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_TTL_COL_QUALIFIER +
@@ -604,15 +840,13 @@ int Rdb_key_def::read_memcmp_key_part(const TABLE *table_arg,
/* It is impossible to unpack the column. Skip it. */
if (m_pack_info[part_num].m_maybe_null) {
const char *nullp;
- if (!(nullp = reader->read(1)))
- return 1;
+ if (!(nullp = reader->read(1))) return 1;
if (*nullp == 0) {
/* This is a NULL value */
return -1;
} else {
/* If NULL marker is not '0', it can be only '1' */
- if (*nullp != 1)
- return 1;
+ if (*nullp != 1) return 1;
}
}
@@ -622,11 +856,12 @@ int Rdb_key_def::read_memcmp_key_part(const TABLE *table_arg,
bool is_hidden_pk_part = (part_num + 1 == m_key_parts) &&
(table_arg->s->primary_key == MAX_INDEXES);
Field *field = nullptr;
- if (!is_hidden_pk_part)
+ if (!is_hidden_pk_part) {
field = fpi->get_field_in_table(table_arg);
- if ((this->*fpi->m_skip_func)(fpi, field, reader))
+ }
+ if ((fpi->m_skip_func)(fpi, field, reader)) {
return 1;
-
+ }
return 0;
}
@@ -661,6 +896,7 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE *const table,
uchar *const pk_buffer) const {
DBUG_ASSERT(table != nullptr);
DBUG_ASSERT(key != nullptr);
+ DBUG_ASSERT(m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY);
DBUG_ASSERT(pk_buffer);
uint size = 0;
@@ -679,8 +915,7 @@ uint Rdb_key_def::get_primary_key_tuple(const TABLE *const table,
Rdb_string_reader reader(key);
// Skip the index number
- if ((!reader.read(INDEX_NUMBER_SIZE)))
- return RDB_INVALID_KEY_LEN;
+ if ((!reader.read(INDEX_NUMBER_SIZE))) return RDB_INVALID_KEY_LEN;
for (i = 0; i < m_key_parts; i++) {
if ((pk_key_part = m_pk_part_no[i]) != -1) {
@@ -731,8 +966,7 @@ uint Rdb_key_def::get_memcmp_sk_parts(const TABLE *table,
const char *start = reader.get_current_ptr();
// Skip the index number
- if ((!reader.read(INDEX_NUMBER_SIZE)))
- return RDB_INVALID_KEY_LEN;
+ if ((!reader.read(INDEX_NUMBER_SIZE))) return RDB_INVALID_KEY_LEN;
for (uint i = 0; i < table->key_info[m_keyno].user_defined_key_parts; i++) {
if ((res = read_memcmp_key_part(table, &reader, i)) > 0) {
@@ -772,8 +1006,7 @@ uint Rdb_key_def::pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer,
key_restore(tbl->record[0], key_tuple, &tbl->key_info[m_keyno], key_len);
uint n_used_parts = my_count_bits(keypart_map);
- if (keypart_map == HA_WHOLE_KEY)
- n_used_parts = 0; // Full key is used
+ if (keypart_map == HA_WHOLE_KEY) n_used_parts = 0; // Full key is used
/* Then, convert the record into a mem-comparable form */
return pack_record(tbl, pack_buffer, tbl->record[0], packed_tuple, nullptr,
@@ -811,7 +1044,7 @@ bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) {
/*
@return Number of bytes that were changed
*/
-int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) {
+int Rdb_key_def::successor(uchar *const packed_tuple, const uint len) {
DBUG_ASSERT(packed_tuple != nullptr);
int changed = 0;
@@ -830,7 +1063,7 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) {
/*
@return Number of bytes that were changed
*/
-int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint &len) {
+int Rdb_key_def::predecessor(uchar *const packed_tuple, const uint len) {
DBUG_ASSERT(packed_tuple != nullptr);
int changed = 0;
@@ -889,30 +1122,30 @@ void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const {
}
switch (field->real_type()) {
- // This type may be covered depending on the record. If it was requested,
- // we require the covered bitmap to have this bit set.
- case MYSQL_TYPE_VARCHAR:
- if (curr_bitmap_pos < MAX_REF_PARTS) {
+ // This type may be covered depending on the record. If it was requested,
+ // we require the covered bitmap to have this bit set.
+ case MYSQL_TYPE_VARCHAR:
+ if (curr_bitmap_pos < MAX_REF_PARTS) {
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(map, curr_bitmap_pos);
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ }
+ curr_bitmap_pos++;
+ } else {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ // This column is a type which is never covered. If it was requested, we
+ // know this lookup will never be covered.
+ default:
if (bitmap_is_set(table->read_set, field->field_index)) {
- bitmap_set_bit(map, curr_bitmap_pos);
- bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
}
- curr_bitmap_pos++;
- } else {
- bitmap_free(&maybe_covered_bitmap);
- bitmap_free(map);
- return;
- }
- break;
- // This column is a type which is never covered. If it was requested, we
- // know this lookup will never be covered.
- default:
- if (bitmap_is_set(table->read_set, field->field_index)) {
- bitmap_free(&maybe_covered_bitmap);
- bitmap_free(map);
- return;
- }
- break;
+ break;
}
}
@@ -930,8 +1163,7 @@ void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const {
- All values for columns that are prefix-only indexes are shorter or equal
in length to the prefix
*/
-bool Rdb_key_def::covers_lookup(TABLE *const table,
- const rocksdb::Slice *const unpack_info,
+bool Rdb_key_def::covers_lookup(const rocksdb::Slice *const unpack_info,
const MY_BITMAP *const lookup_bitmap) const {
DBUG_ASSERT(lookup_bitmap != nullptr);
if (!use_covered_bitmap_format() || lookup_bitmap->bitmap == nullptr) {
@@ -960,6 +1192,14 @@ bool Rdb_key_def::covers_lookup(TABLE *const table,
return bitmap_is_subset(lookup_bitmap, &covered_bitmap);
}
+/* Indicates that all key parts can be unpacked to cover a secondary lookup */
+bool Rdb_key_def::can_cover_lookup() const {
+ for (uint i = 0; i < m_key_parts; i++) {
+ if (!m_pack_info[i].m_covered) return false;
+ }
+ return true;
+}
+
uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
uchar *tuple, uchar *const packed_tuple,
uchar *const pack_buffer,
@@ -971,8 +1211,7 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
/* NULL value. store '\0' so that it sorts before non-NULL values */
*tuple++ = 0;
/* That's it, don't store anything else */
- if (n_null_fields)
- (*n_null_fields)++;
+ if (n_null_fields) (*n_null_fields)++;
return tuple;
} else {
/* Not a NULL value. Store '1' */
@@ -986,16 +1225,15 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
Rdb_pack_field_context pack_ctx(unpack_info);
// Set the offset for methods which do not take an offset as an argument
- DBUG_ASSERT(is_storage_available(tuple - packed_tuple,
- pack_info->m_max_image_len));
+ DBUG_ASSERT(
+ is_storage_available(tuple - packed_tuple, pack_info->m_max_image_len));
- (this->*pack_info->m_pack_func)(pack_info, field, pack_buffer, &tuple,
- &pack_ctx);
+ (pack_info->m_pack_func)(pack_info, field, pack_buffer, &tuple, &pack_ctx);
/* Make "unpack info" to be stored in the value */
if (create_unpack_info) {
- (this->*pack_info->m_make_unpack_info_func)(pack_info->m_charset_codec,
- field, &pack_ctx);
+ (pack_info->m_make_unpack_info_func)(pack_info->m_charset_codec, field,
+ &pack_ctx);
}
return tuple;
@@ -1014,8 +1252,8 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
unpack_info_len OUT Unpack data length
n_key_parts Number of keyparts to process. 0 means all of them.
n_null_fields OUT Number of key fields with NULL value.
- ttl_pk_offset OUT Offset of the ttl column if specified and in the key
-
+ ttl_bytes IN Previous ttl bytes from old record for update case or
+ current ttl bytes from just packed primary key/value
@detail
Some callers do not need the unpack information, they can pass
unpack_info=nullptr, unpack_info_len=nullptr.
@@ -1024,12 +1262,14 @@ uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
Length of the packed tuple
*/
-uint Rdb_key_def::pack_record(
- const TABLE *const tbl, uchar *const pack_buffer, const uchar *const record,
- uchar *const packed_tuple, Rdb_string_writer *const unpack_info,
- const bool &should_store_row_debug_checksums, const longlong &hidden_pk_id,
- uint n_key_parts, uint *const n_null_fields, uint *const ttl_pk_offset,
- const char *const ttl_bytes) const {
+uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
+ const uchar *const record,
+ uchar *const packed_tuple,
+ Rdb_string_writer *const unpack_info,
+ const bool should_store_row_debug_checksums,
+ const longlong hidden_pk_id, uint n_key_parts,
+ uint *const n_null_fields,
+ const char *const ttl_bytes) const {
DBUG_ASSERT(tbl != nullptr);
DBUG_ASSERT(pack_buffer != nullptr);
DBUG_ASSERT(record != nullptr);
@@ -1056,13 +1296,13 @@ uint Rdb_key_def::pack_record(
// If hidden pk exists, but hidden pk wasnt passed in, we can't pack the
// hidden key part. So we skip it (its always 1 part).
- if (hidden_pk_exists && !hidden_pk_id && use_all_columns)
+ if (hidden_pk_exists && !hidden_pk_id && use_all_columns) {
n_key_parts = m_key_parts - 1;
- else if (use_all_columns)
+ } else if (use_all_columns) {
n_key_parts = m_key_parts;
+ }
- if (n_null_fields)
- *n_null_fields = 0;
+ if (n_null_fields) *n_null_fields = 0;
// Check if we need a covered bitmap. If it is certain that all key parts are
// covering, we don't need one.
@@ -1130,18 +1370,9 @@ uint Rdb_key_def::pack_record(
uint null_offset = field->null_offset(tbl->record[0]);
bool maybe_null = field->real_maybe_null();
- // Save the ttl duration offset in the key so we can store it in front of
- // the record later.
- if (ttl_pk_offset && m_ttl_duration > 0 && i == m_ttl_pk_key_part_offset) {
- DBUG_ASSERT(field_check_field_name_match(field, m_ttl_column.c_str()));
- DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
- DBUG_ASSERT(field->key_type() == HA_KEYTYPE_ULONGLONG);
- DBUG_ASSERT(!field->real_maybe_null());
- *ttl_pk_offset = tuple - packed_tuple;
- }
-
- field->move_field(const_cast<uchar*>(record) + field_offset,
- maybe_null ? const_cast<uchar*>(record) + null_offset : nullptr,
+ field->move_field(
+ const_cast<uchar *>(record) + field_offset,
+ maybe_null ? const_cast<uchar *>(record) + null_offset : nullptr,
field->null_bit);
// WARNING! Don't return without restoring field->ptr and field->null_ptr
@@ -1226,7 +1457,7 @@ uint Rdb_key_def::pack_record(
Length of the packed tuple
*/
-uint Rdb_key_def::pack_hidden_pk(const longlong &hidden_pk_id,
+uint Rdb_key_def::pack_hidden_pk(const longlong hidden_pk_id,
uchar *const packed_tuple) const {
DBUG_ASSERT(packed_tuple != nullptr);
@@ -1250,7 +1481,7 @@ uint Rdb_key_def::pack_hidden_pk(const longlong &hidden_pk_id,
void Rdb_key_def::pack_with_make_sort_key(
Rdb_field_packing *const fpi, Field *const field,
uchar *const buf MY_ATTRIBUTE((__unused__)), uchar **dst,
- Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const {
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) {
DBUG_ASSERT(fpi != nullptr);
DBUG_ASSERT(field != nullptr);
DBUG_ASSERT(dst != nullptr);
@@ -1290,11 +1521,9 @@ int Rdb_key_def::compare_keys(const rocksdb::Slice *key1,
Rdb_string_reader reader2(key2);
// Skip the index number
- if ((!reader1.read(INDEX_NUMBER_SIZE)))
- return HA_EXIT_FAILURE;
+ if ((!reader1.read(INDEX_NUMBER_SIZE))) return HA_EXIT_FAILURE;
- if ((!reader2.read(INDEX_NUMBER_SIZE)))
- return HA_EXIT_FAILURE;
+ if ((!reader2.read(INDEX_NUMBER_SIZE))) return HA_EXIT_FAILURE;
for (uint i = 0; i < m_key_parts; i++) {
const Rdb_field_packing *const fpi = &m_pack_info[i];
@@ -1320,10 +1549,12 @@ int Rdb_key_def::compare_keys(const rocksdb::Slice *key1,
const auto before_skip1 = reader1.get_current_ptr();
const auto before_skip2 = reader2.get_current_ptr();
DBUG_ASSERT(fpi->m_skip_func);
- if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader1))
+ if ((fpi->m_skip_func)(fpi, nullptr, &reader1)) {
return HA_EXIT_FAILURE;
- if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader2))
+ }
+ if ((fpi->m_skip_func)(fpi, nullptr, &reader2)) {
return HA_EXIT_FAILURE;
+ }
const auto size1 = reader1.get_current_ptr() - before_skip1;
const auto size2 = reader2.get_current_ptr() - before_skip2;
if (size1 != size2) {
@@ -1355,48 +1586,20 @@ size_t Rdb_key_def::key_length(const TABLE *const table,
Rdb_string_reader reader(&key);
- if ((!reader.read(INDEX_NUMBER_SIZE)))
+ if ((!reader.read(INDEX_NUMBER_SIZE))) {
return size_t(-1);
-
+ }
for (uint i = 0; i < m_key_parts; i++) {
const Rdb_field_packing *fpi = &m_pack_info[i];
const Field *field = nullptr;
- if (m_index_type != INDEX_TYPE_HIDDEN_PRIMARY)
+ if (m_index_type != INDEX_TYPE_HIDDEN_PRIMARY) {
field = fpi->get_field_in_table(table);
- if ((this->*fpi->m_skip_func)(fpi, field, &reader))
- return size_t(-1);
- }
- return key.size() - reader.remaining_bytes();
-}
-
-int Rdb_key_def::unpack_field(
- Rdb_field_packing *const fpi,
- Field *const field,
- Rdb_string_reader* reader,
- const uchar *const default_value,
- Rdb_string_reader* unp_reader) const
-{
- if (fpi->m_maybe_null) {
- const char *nullp;
- if (!(nullp = reader->read(1))) {
- return HA_EXIT_FAILURE;
}
-
- if (*nullp == 0) {
- /* Set the NULL-bit of this field */
- field->set_null();
- /* Also set the field to its default value */
- memcpy(field->ptr, default_value, field->pack_length());
- return HA_EXIT_SUCCESS;
- } else if (*nullp == 1) {
- field->set_notnull();
- } else {
- return HA_EXIT_FAILURE;
+ if ((fpi->m_skip_func)(fpi, field, &reader)) {
+ return size_t(-1);
}
}
-
- return (this->*fpi->m_unpack_func)(fpi, field, field->ptr, reader,
- unp_reader);
+ return key.size() - reader.remaining_bytes();
}
/*
@@ -1413,34 +1616,37 @@ int Rdb_key_def::unpack_field(
int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
const rocksdb::Slice *const packed_key,
const rocksdb::Slice *const unpack_info,
- const bool &verify_row_debug_checksums) const {
+ const bool verify_row_debug_checksums) const {
Rdb_string_reader reader(packed_key);
Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info);
- const bool is_hidden_pk = (m_index_type == INDEX_TYPE_HIDDEN_PRIMARY);
- const bool hidden_pk_exists = table_has_hidden_pk(table);
- const bool secondary_key = (m_index_type == INDEX_TYPE_SECONDARY);
// There is no checksuming data after unpack_info for primary keys, because
// the layout there is different. The checksum is verified in
// ha_rocksdb::convert_record_from_storage_format instead.
- DBUG_ASSERT_IMP(!secondary_key, !verify_row_debug_checksums);
+ DBUG_ASSERT_IMP(!(m_index_type == INDEX_TYPE_SECONDARY),
+ !verify_row_debug_checksums);
// Skip the index number
if ((!reader.read(INDEX_NUMBER_SIZE))) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
- // For secondary keys, we expect the value field to contain unpack data and
- // checksum data in that order. One or both can be missing, but they cannot
- // be reordered.
+ // For secondary keys, we expect the value field to contain index flags,
+ // unpack data, and checksum data in that order. One or all can be missing,
+ // but they cannot be reordered.
+ if (unp_reader.remaining_bytes()) {
+ if (m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0 &&
+ !unp_reader.read(m_total_index_flags_length)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
const char *unpack_header = unp_reader.get_current_ptr();
- const bool has_unpack_info =
+ bool has_unpack_info =
unp_reader.remaining_bytes() && is_unpack_data_tag(unpack_header[0]);
if (has_unpack_info) {
- if ((m_index_type == INDEX_TYPE_SECONDARY &&
- m_total_index_flags_length > 0 &&
- !unp_reader.read(m_total_index_flags_length)) ||
- !unp_reader.read(get_unpack_header_size(unpack_header[0]))) {
+ if (!unp_reader.read(get_unpack_header_size(unpack_header[0]))) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
}
@@ -1448,9 +1654,7 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
// Read the covered bitmap
MY_BITMAP covered_bitmap;
my_bitmap_map covered_bits;
- uint curr_bitmap_pos = 0;
-
- const bool has_covered_bitmap =
+ bool has_covered_bitmap =
has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG);
if (has_covered_bitmap) {
bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
@@ -1459,87 +1663,16 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
RDB_UNPACK_COVERED_DATA_LEN_SIZE);
}
- for (uint i = 0; i < m_key_parts; i++) {
- Rdb_field_packing *const fpi = &m_pack_info[i];
-
- /*
- Hidden pk field is packed at the end of the secondary keys, but the SQL
- layer does not know about it. Skip retrieving field if hidden pk.
- */
- if ((secondary_key && hidden_pk_exists && i + 1 == m_key_parts) ||
- is_hidden_pk) {
- DBUG_ASSERT(fpi->m_unpack_func);
- if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader)) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
- continue;
- }
+ int err = HA_EXIT_SUCCESS;
- Field *const field = fpi->get_field_in_table(table);
- bool covered_column = true;
- if (has_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
- !m_pack_info[i].m_covered) {
- covered_column = curr_bitmap_pos < MAX_REF_PARTS &&
- bitmap_is_set(&covered_bitmap, curr_bitmap_pos);
- curr_bitmap_pos++;
- }
- if (fpi->m_unpack_func && covered_column) {
- /* It is possible to unpack this column. Do it. */
-
- uint field_offset = field->ptr - table->record[0];
- uint null_offset = field->null_offset();
- bool maybe_null = field->real_maybe_null();
- field->move_field(buf + field_offset,
- maybe_null ? buf + null_offset : nullptr,
- field->null_bit);
- // WARNING! Don't return without restoring field->ptr and field->null_ptr
-
- // If we need unpack info, but there is none, tell the unpack function
- // this by passing unp_reader as nullptr. If we never read unpack_info
- // during unpacking anyway, then there won't an error.
- const bool maybe_missing_unpack =
- !has_unpack_info && fpi->uses_unpack_info();
- int res = unpack_field(fpi, field, &reader,
- table->s->default_values + field_offset,
- maybe_missing_unpack ? nullptr : &unp_reader);
-
- // Restore field->ptr and field->null_ptr
- field->move_field(table->record[0] + field_offset,
- maybe_null ? table->record[0] + null_offset : nullptr,
- field->null_bit);
-
- if (res != UNPACK_SUCCESS) {
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
- } else {
- /* It is impossible to unpack the column. Skip it. */
- if (fpi->m_maybe_null) {
- const char *nullp;
- if (!(nullp = reader.read(1)))
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- if (*nullp == 0) {
- /* This is a NULL value */
- continue;
- }
- /* If NULL marker is not '0', it can be only '1' */
- if (*nullp != 1)
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
- }
- if ((this->*fpi->m_skip_func)(fpi, field, &reader))
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
-
- // If this is a space padded varchar, we need to skip the indicator
- // bytes for trailing bytes. They're useless since we can't restore the
- // field anyway.
- //
- // There is a special case for prefixed varchars where we do not
- // generate unpack info, because we know prefixed varchars cannot be
- // unpacked. In this case, it is not necessary to skip.
- if (fpi->m_skip_func == &Rdb_key_def::skip_variable_space_pad &&
- !fpi->m_unpack_info_stores_value) {
- unp_reader.read(fpi->m_unpack_info_uses_two_bytes ? 2 : 1);
- }
+ Rdb_key_field_iterator iter(
+ this, m_pack_info, &reader, &unp_reader, table, has_unpack_info,
+ has_covered_bitmap ? &covered_bitmap : nullptr, buf);
+ while (iter.has_next()) {
+ err = iter.next();
+ if (err) {
+ return err;
}
}
@@ -1578,8 +1711,7 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
}
}
- if (reader.remaining_bytes())
- return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ if (reader.remaining_bytes()) return HA_ERR_ROCKSDB_CORRUPT_DATA;
return HA_EXIT_SUCCESS;
}
@@ -1588,7 +1720,7 @@ bool Rdb_key_def::table_has_hidden_pk(const TABLE *const table) {
return table->s->primary_key == MAX_INDEXES;
}
-void Rdb_key_def::report_checksum_mismatch(const bool &is_key,
+void Rdb_key_def::report_checksum_mismatch(const bool is_key,
const char *const data,
const size_t data_size) const {
// NO_LINT_DEBUG
@@ -1603,17 +1735,17 @@ void Rdb_key_def::report_checksum_mismatch(const bool &is_key,
my_error(ER_INTERNAL_ERROR, MYF(0), "Record checksum mismatch");
}
-bool Rdb_key_def::index_format_min_check(const int &pk_min,
- const int &sk_min) const {
+bool Rdb_key_def::index_format_min_check(const int pk_min,
+ const int sk_min) const {
switch (m_index_type) {
- case INDEX_TYPE_PRIMARY:
- case INDEX_TYPE_HIDDEN_PRIMARY:
- return (m_kv_format_version >= pk_min);
- case INDEX_TYPE_SECONDARY:
- return (m_kv_format_version >= sk_min);
- default:
- DBUG_ASSERT(0);
- return false;
+ case INDEX_TYPE_PRIMARY:
+ case INDEX_TYPE_HIDDEN_PRIMARY:
+ return (m_kv_format_version >= pk_min);
+ case INDEX_TYPE_SECONDARY:
+ return (m_kv_format_version >= sk_min);
+ default:
+ DBUG_ASSERT(0);
+ return false;
}
}
@@ -1628,9 +1760,8 @@ bool Rdb_key_def::index_format_min_check(const int &pk_min,
int Rdb_key_def::skip_max_length(const Rdb_field_packing *const fpi,
const Field *const field
MY_ATTRIBUTE((__unused__)),
- Rdb_string_reader *const reader) const {
- if (!reader->read(fpi->m_max_image_len))
- return HA_EXIT_FAILURE;
+ Rdb_string_reader *const reader) {
+ if (!reader->read(fpi->m_max_image_len)) return HA_EXIT_FAILURE;
return HA_EXIT_SUCCESS;
}
@@ -1639,27 +1770,26 @@ int Rdb_key_def::skip_max_length(const Rdb_field_packing *const fpi,
split in the middle of an UTF-8 character. See the implementation of
unpack_binary_or_utf8_varchar.
*/
-
#define RDB_ESCAPE_LENGTH 9
#define RDB_LEGACY_ESCAPE_LENGTH RDB_ESCAPE_LENGTH
static_assert((RDB_ESCAPE_LENGTH - 1) % 2 == 0,
"RDB_ESCAPE_LENGTH-1 must be even.");
-#define RDB_ENCODED_SIZE(len) \
- ((len + (RDB_ESCAPE_LENGTH - 2)) / (RDB_ESCAPE_LENGTH - 1)) * \
+#define RDB_ENCODED_SIZE(len) \
+ ((len + (RDB_ESCAPE_LENGTH - 2)) / (RDB_ESCAPE_LENGTH - 1)) * \
RDB_ESCAPE_LENGTH
-#define RDB_LEGACY_ENCODED_SIZE(len) \
- ((len + (RDB_LEGACY_ESCAPE_LENGTH - 1)) / (RDB_LEGACY_ESCAPE_LENGTH - 1)) * \
+#define RDB_LEGACY_ENCODED_SIZE(len) \
+ ((len + (RDB_LEGACY_ESCAPE_LENGTH - 1)) / (RDB_LEGACY_ESCAPE_LENGTH - 1)) * \
RDB_LEGACY_ESCAPE_LENGTH
/*
Function of type rdb_index_field_skip_t
*/
-int Rdb_key_def::skip_variable_length(
- const Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_string_reader *const reader) const {
+int Rdb_key_def::skip_variable_length(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader) {
const uchar *ptr;
bool finished = false;
@@ -1672,7 +1802,7 @@ int Rdb_key_def::skip_variable_length(
dst_len = UINT_MAX;
}
- bool use_legacy_format = use_legacy_varbinary_format();
+ bool use_legacy_format = fpi->m_use_legacy_varbinary_format;
/* Decode the length-emitted encoding here */
while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) {
@@ -1713,9 +1843,9 @@ const int VARCHAR_CMP_GREATER_THAN_SPACES = 3;
Skip a keypart that uses Variable-Length Space-Padded encoding
*/
-int Rdb_key_def::skip_variable_space_pad(
- const Rdb_field_packing *const fpi, const Field *const field,
- Rdb_string_reader *const reader) const {
+int Rdb_key_def::skip_variable_space_pad(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader) {
const uchar *ptr;
bool finished = false;
@@ -1760,31 +1890,33 @@ int Rdb_key_def::skip_variable_space_pad(
int Rdb_key_def::unpack_integer(
Rdb_field_packing *const fpi, Field *const field, uchar *const to,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
const int length = fpi->m_max_image_len;
const uchar *from;
- if (!(from = (const uchar *)reader->read(length)))
+ if (!(from = (const uchar *)reader->read(length))) {
return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
+ }
#ifdef WORDS_BIGENDIAN
{
- if (((Field_num *)field)->unsigned_flag)
+ if (static_cast<Field_num *>(field)->unsigned_flag) {
to[0] = from[0];
- else
- to[0] = (char)(from[0] ^ 128); // Reverse the sign bit.
+ } else {
+ to[0] = static_cast<char>(from[0] ^ 128); // Reverse the sign bit.
+ }
memcpy(to + 1, from + 1, length - 1);
}
#else
{
const int sign_byte = from[0];
- if (((Field_num *)field)->unsigned_flag)
+ if (static_cast<Field_num *>(field)->unsigned_flag) {
to[length - 1] = sign_byte;
- else
+ } else {
to[length - 1] =
- static_cast<char>(sign_byte ^ 128); // Reverse the sign bit.
- for (int i = 0, j = length - 1; i < length - 1; ++i, --j)
- to[i] = from[j];
+ static_cast<char>(sign_byte ^ 128); // Reverse the sign bit.
+ }
+ for (int i = 0, j = length - 1; i < length - 1; ++i, --j) to[i] = from[j];
}
#endif
return UNPACK_SUCCESS;
@@ -1826,13 +1958,14 @@ static void rdb_swap_float_bytes(uchar *const dst, const uchar *const src) {
#endif
int Rdb_key_def::unpack_floating_point(
- uchar *const dst, Rdb_string_reader *const reader, const size_t &size,
- const int &exp_digit, const uchar *const zero_pattern,
- const uchar *const zero_val,
- void (*swap_func)(uchar *, const uchar *)) const {
+ uchar *const dst, Rdb_string_reader *const reader, const size_t size,
+ const int exp_digit, const uchar *const zero_pattern,
+ const uchar *const zero_val, void (*swap_func)(uchar *, const uchar *)) {
const uchar *const from = (const uchar *)reader->read(size);
- if (from == nullptr)
- return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
+ if (from == nullptr) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
/* Check to see if the value is zero */
if (memcmp(from, zero_pattern, size) == 0) {
@@ -1854,15 +1987,14 @@ int Rdb_key_def::unpack_floating_point(
// If the high bit is set the original value was positive so
// remove the high bit and subtract one from the exponent.
ushort exp_part = ((ushort)tmp[0] << 8) | (ushort)tmp[1];
- exp_part &= 0x7FFF; // clear high bit;
- exp_part -= (ushort)1 << (16 - 1 - exp_digit); // subtract from exponent
+ exp_part &= 0x7FFF; // clear high bit;
+ exp_part -= (ushort)1 << (16 - 1 - exp_digit); // subtract from exponent
tmp[0] = (uchar)(exp_part >> 8);
tmp[1] = (uchar)exp_part;
} else {
// Otherwise the original value was negative and all bytes have been
// negated.
- for (size_t ii = 0; ii < size; ii++)
- tmp[ii] ^= 0xFF;
+ for (size_t ii = 0; ii < size; ii++) tmp[ii] ^= 0xFF;
}
#if !defined(WORDS_BIGENDIAN)
@@ -1891,7 +2023,7 @@ int Rdb_key_def::unpack_double(
Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
static double zero_val = 0.0;
static const uchar zero_pattern[8] = {128, 0, 0, 0, 0, 0, 0, 0};
@@ -1915,7 +2047,7 @@ int Rdb_key_def::unpack_double(
int Rdb_key_def::unpack_float(
Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)),
uchar *const field_ptr, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
static float zero_val = 0.0;
static const uchar zero_pattern[4] = {128, 0, 0, 0};
@@ -1932,12 +2064,14 @@ int Rdb_key_def::unpack_float(
int Rdb_key_def::unpack_newdate(
Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)),
uchar *const field_ptr, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
const char *from;
DBUG_ASSERT(fpi->m_max_image_len == 3);
- if (!(from = reader->read(3)))
- return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
+ if (!(from = reader->read(3))) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
field_ptr[0] = from[2];
field_ptr[1] = from[1];
@@ -1954,10 +2088,12 @@ int Rdb_key_def::unpack_newdate(
int Rdb_key_def::unpack_binary_str(
Rdb_field_packing *const fpi, Field *const field, uchar *const to,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
const char *from;
- if (!(from = reader->read(fpi->m_max_image_len)))
- return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
+ if (!(from = reader->read(fpi->m_max_image_len))) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
memcpy(to, from, fpi->m_max_image_len);
return UNPACK_SUCCESS;
@@ -1972,11 +2108,13 @@ int Rdb_key_def::unpack_binary_str(
int Rdb_key_def::unpack_utf8_str(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
my_core::CHARSET_INFO *const cset = (my_core::CHARSET_INFO *)field->charset();
const uchar *src;
- if (!(src = (const uchar *)reader->read(fpi->m_max_image_len)))
- return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
+ if (!(src = (const uchar *)reader->read(fpi->m_max_image_len))) {
+ /* Mem-comparable image doesn't have enough bytes */
+ return UNPACK_FAILURE;
+ }
const uchar *const src_end = src + fpi->m_max_image_len;
uchar *const dst_end = dst + field->pack_length();
@@ -1986,8 +2124,7 @@ int Rdb_key_def::unpack_utf8_str(
src += 2;
int res = cset->cset->wc_mb(cset, wc, dst, dst_end);
DBUG_ASSERT(res > 0 && res <= 3);
- if (res < 0)
- return UNPACK_FAILURE;
+ if (res < 0) return UNPACK_FAILURE;
dst += res;
}
@@ -2015,9 +2152,9 @@ int Rdb_key_def::unpack_utf8_str(
See pack_variable_format for the newer algorithm.
*/
void Rdb_key_def::pack_legacy_variable_format(
- const uchar *src, // The data to encode
- size_t src_len, // The length of the data to encode
- uchar **dst) const // The location to encode the data
+ const uchar *src, // The data to encode
+ size_t src_len, // The length of the data to encode
+ uchar **dst) // The location to encode the data
{
size_t copy_len;
size_t padding_bytes;
@@ -2067,9 +2204,9 @@ void Rdb_key_def::pack_legacy_variable_format(
- 10 bytes is encoded as X X X X X X X X 9 X X 0 0 0 0 0 0 2
*/
void Rdb_key_def::pack_variable_format(
- const uchar *src, // The data to encode
- size_t src_len, // The length of the data to encode
- uchar **dst) const // The location to encode the data
+ const uchar *src, // The data to encode
+ size_t src_len, // The length of the data to encode
+ uchar **dst) // The location to encode the data
{
uchar *ptr = *dst;
@@ -2108,7 +2245,7 @@ void Rdb_key_def::pack_variable_format(
void Rdb_key_def::pack_with_varchar_encoding(
Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
- Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const {
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) {
const CHARSET_INFO *const charset = field->charset();
Field_varstring *const field_var = (Field_varstring *)field;
@@ -2120,7 +2257,7 @@ void Rdb_key_def::pack_with_varchar_encoding(
field_var->ptr + field_var->length_bytes, value_length, 0);
/* Got a mem-comparable image in 'buf'. Now, produce varlength encoding */
- if (use_legacy_varbinary_format()) {
+ if (fpi->m_use_legacy_varbinary_format) {
pack_legacy_variable_format(buf, xfrm_len, dst);
} else {
pack_variable_format(buf, xfrm_len, dst);
@@ -2132,14 +2269,13 @@ void Rdb_key_def::pack_with_varchar_encoding(
sequence of strings in space_xfrm
*/
-static int
-rdb_compare_string_with_spaces(const uchar *buf, const uchar *const buf_end,
- const std::vector<uchar> *const space_xfrm) {
+static int rdb_compare_string_with_spaces(
+ const uchar *buf, const uchar *const buf_end,
+ const std::vector<uchar> *const space_xfrm) {
int cmp = 0;
while (buf < buf_end) {
size_t bytes = std::min((size_t)(buf_end - buf), space_xfrm->size());
- if ((cmp = memcmp(buf, space_xfrm->data(), bytes)) != 0)
- break;
+ if ((cmp = memcmp(buf, space_xfrm->data(), bytes)) != 0) break;
buf += bytes;
}
return cmp;
@@ -2219,7 +2355,7 @@ static const int RDB_TRIMMED_CHARS_OFFSET = 8;
void Rdb_key_def::pack_with_varchar_space_pad(
Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
- Rdb_pack_field_context *const pack_ctx) const {
+ Rdb_pack_field_context *const pack_ctx) {
Rdb_string_writer *const unpack_info = pack_ctx->writer;
const CHARSET_INFO *const charset = field->charset();
const auto field_var = static_cast<Field_varstring *>(field);
@@ -2252,7 +2388,7 @@ void Rdb_key_def::pack_with_varchar_space_pad(
if (padding_bytes) {
memcpy(ptr, fpi->space_xfrm->data(), padding_bytes);
ptr += padding_bytes;
- *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; // last segment
+ *ptr = VARCHAR_CMP_EQUAL_TO_SPACES; // last segment
} else {
// Compare the string suffix with a hypothetical infinite string of
// spaces. It could be that the first difference is beyond the end of
@@ -2260,19 +2396,18 @@ void Rdb_key_def::pack_with_varchar_space_pad(
const int cmp =
rdb_compare_string_with_spaces(buf, buf_end, fpi->space_xfrm);
- if (cmp < 0)
+ if (cmp < 0) {
*ptr = VARCHAR_CMP_LESS_THAN_SPACES;
- else if (cmp > 0)
+ } else if (cmp > 0) {
*ptr = VARCHAR_CMP_GREATER_THAN_SPACES;
- else {
+ } else {
// It turns out all the rest are spaces.
*ptr = VARCHAR_CMP_EQUAL_TO_SPACES;
}
}
encoded_size += fpi->m_segment_size;
- if (*(ptr++) == VARCHAR_CMP_EQUAL_TO_SPACES)
- break;
+ if (*(ptr++) == VARCHAR_CMP_EQUAL_TO_SPACES) break;
}
// m_unpack_info_stores_value means unpack_info stores the whole original
@@ -2305,8 +2440,7 @@ void Rdb_key_def::pack_with_varchar_space_pad(
last chunk in the input. This is based on the old legacy format - see
pack_legacy_variable_format.
*/
-uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag,
- bool *done) const {
+uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag, bool *done) {
uint pad = 255 - flag;
uint used_bytes = RDB_LEGACY_ESCAPE_LENGTH - 1 - pad;
if (used_bytes > RDB_LEGACY_ESCAPE_LENGTH - 1) {
@@ -2322,7 +2456,7 @@ uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag,
last chunk in the input. This is based on the new format - see
pack_variable_format.
*/
-uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) const {
+uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) {
// Check for invalid flag values
if (flag > RDB_ESCAPE_LENGTH) {
return (uint)-1;
@@ -2345,13 +2479,13 @@ uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) const {
treated as a wide-character and converted to its multibyte equivalent in
the output.
*/
-static int
-unpack_charset(const CHARSET_INFO *cset, // character set information
- const uchar *src, // source data to unpack
- uint src_len, // length of source data
- uchar *dst, // destination of unpacked data
- uint dst_len, // length of destination data
- uint *used_bytes) // output number of bytes used
+static int unpack_charset(
+ const CHARSET_INFO *cset, // character set information
+ const uchar *src, // source data to unpack
+ uint src_len, // length of source data
+ uchar *dst, // destination of unpacked data
+ uint dst_len, // length of destination data
+ uint *used_bytes) // output number of bytes used
{
if (src_len & 1) {
/*
@@ -2386,7 +2520,7 @@ unpack_charset(const CHARSET_INFO *cset, // character set information
int Rdb_key_def::unpack_binary_or_utf8_varchar(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
const uchar *ptr;
size_t len = 0;
bool finished = false;
@@ -2396,7 +2530,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar(
// How much we can unpack
size_t dst_len = field_var->pack_length() - field_var->length_bytes;
- bool use_legacy_format = use_legacy_varbinary_format();
+ bool use_legacy_format = fpi->m_use_legacy_varbinary_format;
/* Decode the length-emitted encoding here */
while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) {
@@ -2460,8 +2594,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar(
*/
int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
- Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const {
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) {
const uchar *ptr;
size_t len = 0;
bool finished = false;
@@ -2482,8 +2615,9 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
space_padding_bytes =
-(static_cast<int>(extra_spaces) - RDB_TRIMMED_CHARS_OFFSET);
extra_spaces = 0;
- } else
+ } else {
extra_spaces -= RDB_TRIMMED_CHARS_OFFSET;
+ }
space_padding_bytes *= fpi->space_xfrm_len;
@@ -2491,16 +2625,17 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) {
const char last_byte = ptr[fpi->m_segment_size - 1];
size_t used_bytes;
- if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) // this is the last segment
+ if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) // this is the last segment
{
- if (space_padding_bytes > (fpi->m_segment_size - 1))
- return UNPACK_FAILURE; // Cannot happen, corrupted data
+ if (space_padding_bytes > (fpi->m_segment_size - 1)) {
+ return UNPACK_FAILURE; // Cannot happen, corrupted data
+ }
used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes;
finished = true;
} else {
if (last_byte != VARCHAR_CMP_LESS_THAN_SPACES &&
last_byte != VARCHAR_CMP_GREATER_THAN_SPACES) {
- return UNPACK_FAILURE; // Invalid value
+ return UNPACK_FAILURE; // Invalid value
}
used_bytes = fpi->m_segment_size - 1;
}
@@ -2523,14 +2658,12 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
const CHARSET_INFO *cset = fpi->m_varchar_charset;
int res = cset->cset->wc_mb(cset, wc, dst, dst_end);
DBUG_ASSERT(res <= 3);
- if (res <= 0)
- return UNPACK_FAILURE;
+ if (res <= 0) return UNPACK_FAILURE;
dst += res;
len += res;
}
} else {
- if (dst + used_bytes > dst_end)
- return UNPACK_FAILURE;
+ if (dst + used_bytes > dst_end) return UNPACK_FAILURE;
memcpy(dst, ptr, used_bytes);
dst += used_bytes;
len += used_bytes;
@@ -2540,8 +2673,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
if (extra_spaces) {
// Both binary and UTF-8 charset store space as ' ',
// so the following is ok:
- if (dst + extra_spaces > dst_end)
- return UNPACK_FAILURE;
+ if (dst + extra_spaces > dst_end) return UNPACK_FAILURE;
memset(dst, fpi->m_varchar_charset->pad_char, extra_spaces);
len += extra_spaces;
}
@@ -2549,8 +2681,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
}
}
- if (!finished)
- return UNPACK_FAILURE;
+ if (!finished) return UNPACK_FAILURE;
/* Save the length */
if (field_var->length_bytes == 1) {
@@ -2570,7 +2701,7 @@ int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
void Rdb_key_def::make_unpack_unknown(
const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_pack_field_context *const pack_ctx) const {
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) {
pack_ctx->writer->write(field->ptr, field->pack_length());
}
@@ -2585,7 +2716,7 @@ void Rdb_key_def::make_unpack_unknown(
void Rdb_key_def::dummy_make_unpack_info(
const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
const Field *field MY_ATTRIBUTE((__unused__)),
- Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) const {
+ Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) {
// Do nothing
}
@@ -2596,7 +2727,7 @@ void Rdb_key_def::dummy_make_unpack_info(
int Rdb_key_def::unpack_unknown(Rdb_field_packing *const fpi,
Field *const field, uchar *const dst,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const {
+ Rdb_string_reader *const unp_reader) {
const uchar *ptr;
const uint len = fpi->m_unpack_data_len;
// We don't use anything from the key, so skip over it.
@@ -2619,7 +2750,7 @@ int Rdb_key_def::unpack_unknown(Rdb_field_packing *const fpi,
void Rdb_key_def::make_unpack_unknown_varchar(
const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_pack_field_context *const pack_ctx) const {
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) {
const auto f = static_cast<const Field_varstring *>(field);
uint len = f->length_bytes == 1 ? (uint)*f->ptr : uint2korr(f->ptr);
len += f->length_bytes;
@@ -2640,17 +2771,17 @@ void Rdb_key_def::make_unpack_unknown_varchar(
make_unpack_unknown, unpack_unknown
*/
-int Rdb_key_def::unpack_unknown_varchar(
- Rdb_field_packing *const fpi, Field *const field, uchar *dst,
- Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const {
+int Rdb_key_def::unpack_unknown_varchar(Rdb_field_packing *const fpi,
+ Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) {
const uchar *ptr;
uchar *const d0 = dst;
const auto f = static_cast<Field_varstring *>(field);
dst += f->length_bytes;
const uint len_bytes = f->length_bytes;
// We don't use anything from the key, so skip over it.
- if ((this->*fpi->m_skip_func)(fpi, field, reader)) {
+ if ((fpi->m_skip_func)(fpi, field, reader)) {
return UNPACK_FAILURE;
}
@@ -2682,8 +2813,8 @@ static void rdb_write_unpack_simple(Rdb_bit_writer *const writer,
static uint rdb_read_unpack_simple(Rdb_bit_reader *const reader,
const Rdb_collation_codec *const codec,
- const uchar *const src,
- const size_t &src_len, uchar *const dst) {
+ const uchar *const src, const size_t src_len,
+ uchar *const dst) {
for (uint i = 0; i < src_len; i++) {
if (codec->m_dec_size[src[i]] > 0) {
uint *ret;
@@ -2710,7 +2841,7 @@ static uint rdb_read_unpack_simple(Rdb_bit_reader *const reader,
void Rdb_key_def::make_unpack_simple_varchar(
const Rdb_collation_codec *const codec, const Field *const field,
- Rdb_pack_field_context *const pack_ctx) const {
+ Rdb_pack_field_context *const pack_ctx) {
const auto f = static_cast<const Field_varstring *>(field);
uchar *const src = f->ptr + f->length_bytes;
const size_t src_len =
@@ -2732,8 +2863,7 @@ void Rdb_key_def::make_unpack_simple_varchar(
int Rdb_key_def::unpack_simple_varchar_space_pad(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
- Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const {
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) {
const uchar *ptr;
size_t len = 0;
bool finished = false;
@@ -2759,20 +2889,22 @@ int Rdb_key_def::unpack_simple_varchar_space_pad(
if (extra_spaces <= 8) {
space_padding_bytes = -(static_cast<int>(extra_spaces) - 8);
extra_spaces = 0;
- } else
+ } else {
extra_spaces -= 8;
+ }
space_padding_bytes *= fpi->space_xfrm_len;
/* Decode the length-emitted encoding here */
while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) {
const char last_byte =
- ptr[fpi->m_segment_size - 1]; // number of padding bytes
+ ptr[fpi->m_segment_size - 1]; // number of padding bytes
size_t used_bytes;
if (last_byte == VARCHAR_CMP_EQUAL_TO_SPACES) {
// this is the last one
- if (space_padding_bytes > (fpi->m_segment_size - 1))
- return UNPACK_FAILURE; // Cannot happen, corrupted data
+ if (space_padding_bytes > (fpi->m_segment_size - 1)) {
+ return UNPACK_FAILURE; // Cannot happen, corrupted data
+ }
used_bytes = (fpi->m_segment_size - 1) - space_padding_bytes;
finished = true;
} else {
@@ -2799,8 +2931,7 @@ int Rdb_key_def::unpack_simple_varchar_space_pad(
if (finished) {
if (extra_spaces) {
- if (dst + extra_spaces > dst_end)
- return UNPACK_FAILURE;
+ if (dst + extra_spaces > dst_end) return UNPACK_FAILURE;
// pad_char has a 1-byte form in all charsets that
// are handled by rdb_init_collation_mapping.
memset(dst, field_var->charset()->pad_char, extra_spaces);
@@ -2810,8 +2941,7 @@ int Rdb_key_def::unpack_simple_varchar_space_pad(
}
}
- if (!finished)
- return UNPACK_FAILURE;
+ if (!finished) return UNPACK_FAILURE;
/* Save the length */
if (field_var->length_bytes == 1) {
@@ -2834,9 +2964,9 @@ int Rdb_key_def::unpack_simple_varchar_space_pad(
The VARCHAR variant is in make_unpack_simple_varchar
*/
-void Rdb_key_def::make_unpack_simple(
- const Rdb_collation_codec *const codec, const Field *const field,
- Rdb_pack_field_context *const pack_ctx) const {
+void Rdb_key_def::make_unpack_simple(const Rdb_collation_codec *const codec,
+ const Field *const field,
+ Rdb_pack_field_context *const pack_ctx) {
const uchar *const src = field->ptr;
Rdb_bit_writer bit_writer(pack_ctx->writer);
rdb_write_unpack_simple(&bit_writer, codec, src, field->pack_length());
@@ -2850,7 +2980,7 @@ int Rdb_key_def::unpack_simple(Rdb_field_packing *const fpi,
Field *const field MY_ATTRIBUTE((__unused__)),
uchar *const dst,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const {
+ Rdb_string_reader *const unp_reader) {
const uchar *ptr;
const uint len = fpi->m_max_image_len;
Rdb_bit_reader bit_reader(unp_reader);
@@ -2869,7 +2999,7 @@ const int RDB_SPACE_XFRM_SIZE = 32;
// A class holding information about how space character is represented in a
// charset.
class Rdb_charset_space_info {
-public:
+ public:
Rdb_charset_space_info(const Rdb_charset_space_info &) = delete;
Rdb_charset_space_info &operator=(const Rdb_charset_space_info &) = delete;
Rdb_charset_space_info() = default;
@@ -2927,16 +3057,17 @@ static void rdb_get_mem_comparable_space(const CHARSET_INFO *const cs,
const size_t space_mb_len = cs->cset->wc_mb(
cs, (my_wc_t)cs->pad_char, space_mb, space_mb + sizeof(space_mb));
- uchar space[20]; // mem-comparable image of the space character
+ // mem-comparable image of the space character
+ std::array<uchar, 20> space;
- const size_t space_len = cs->coll->strnxfrm(cs, space, sizeof(space), 1,
- space_mb, space_mb_len, 0);
+ const size_t space_len = cs->coll->strnxfrm(
+ cs, space.data(), sizeof(space), 1, space_mb, space_mb_len, 0);
Rdb_charset_space_info *const info = new Rdb_charset_space_info;
info->space_xfrm_len = space_len;
info->space_mb_len = space_mb_len;
while (info->spaces_xfrm.size() < RDB_SPACE_XFRM_SIZE) {
- info->spaces_xfrm.insert(info->spaces_xfrm.end(), space,
- space + space_len);
+ info->spaces_xfrm.insert(info->spaces_xfrm.end(), space.data(),
+ space.data() + space_len);
}
rdb_mem_comparable_space[cs->number].reset(info);
}
@@ -2959,8 +3090,8 @@ bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs) {
!(cs->state & (MY_CS_BINSORT | MY_CS_NOPAD));
}
-static const Rdb_collation_codec *
-rdb_init_collation_mapping(const my_core::CHARSET_INFO *const cs) {
+static const Rdb_collation_codec *rdb_init_collation_mapping(
+ const my_core::CHARSET_INFO *const cs) {
DBUG_ASSERT(cs && cs->state & MY_CS_AVAILABLE);
const Rdb_collation_codec *codec = rdb_collation_data[cs->number];
@@ -2996,11 +3127,10 @@ rdb_init_collation_mapping(const my_core::CHARSET_INFO *const cs) {
}
}
- cur->m_make_unpack_info_func = {
- &Rdb_key_def::make_unpack_simple_varchar,
- &Rdb_key_def::make_unpack_simple};
- cur->m_unpack_func = {&Rdb_key_def::unpack_simple_varchar_space_pad,
- &Rdb_key_def::unpack_simple};
+ cur->m_make_unpack_info_func = {Rdb_key_def::make_unpack_simple_varchar,
+ Rdb_key_def::make_unpack_simple};
+ cur->m_unpack_func = {Rdb_key_def::unpack_simple_varchar_space_pad,
+ Rdb_key_def::unpack_simple};
} else {
// Out of luck for now.
}
@@ -3069,9 +3199,9 @@ static int get_segment_size_from_collation(const CHARSET_INFO *const cs) {
*/
bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
- const Field *const field, const uint &keynr_arg,
- const uint &key_part_arg,
- const uint16 &key_length) {
+ const Field *const field, const uint keynr_arg,
+ const uint key_part_arg,
+ const uint16 key_length) {
int res = false;
enum_field_types type = field ? field->real_type() : MYSQL_TYPE_LONGLONG;
@@ -3082,86 +3212,96 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
m_unpack_func = nullptr;
m_make_unpack_info_func = nullptr;
m_unpack_data_len = 0;
- space_xfrm = nullptr; // safety
-
+ space_xfrm = nullptr; // safety
+ // whether to use legacy format for varchar
+ m_use_legacy_varbinary_format = false;
+ // ha_rocksdb::index_flags() will pass key_descr == null to
+ // see whether field(column) can be read-only reads through return value,
+ // but the legacy vs. new varchar format doesn't affect return value.
+ // Just change m_use_legacy_varbinary_format to true if key_descr isn't given.
+ if (!key_descr || key_descr->use_legacy_varbinary_format()) {
+ m_use_legacy_varbinary_format = true;
+ }
/* Calculate image length. By default, is is pack_length() */
m_max_image_len =
field ? field->pack_length() : ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN;
- m_skip_func = &Rdb_key_def::skip_max_length;
- m_pack_func = &Rdb_key_def::pack_with_make_sort_key;
+ m_skip_func = Rdb_key_def::skip_max_length;
+ m_pack_func = Rdb_key_def::pack_with_make_sort_key;
m_covered = false;
switch (type) {
- case MYSQL_TYPE_LONGLONG:
- case MYSQL_TYPE_LONG:
- case MYSQL_TYPE_INT24:
- case MYSQL_TYPE_SHORT:
- case MYSQL_TYPE_TINY:
- m_unpack_func = &Rdb_key_def::unpack_integer;
- m_covered = true;
- return true;
-
- case MYSQL_TYPE_DOUBLE:
- m_unpack_func = &Rdb_key_def::unpack_double;
- m_covered = true;
- return true;
+ case MYSQL_TYPE_LONGLONG:
+ case MYSQL_TYPE_LONG:
+ case MYSQL_TYPE_INT24:
+ case MYSQL_TYPE_SHORT:
+ case MYSQL_TYPE_TINY:
+ m_unpack_func = Rdb_key_def::unpack_integer;
+ m_covered = true;
+ return true;
- case MYSQL_TYPE_FLOAT:
- m_unpack_func = &Rdb_key_def::unpack_float;
- m_covered = true;
- return true;
+ case MYSQL_TYPE_DOUBLE:
+ m_unpack_func = Rdb_key_def::unpack_double;
+ m_covered = true;
+ return true;
- case MYSQL_TYPE_NEWDECIMAL:
- /*
- Decimal is packed with Field_new_decimal::make_sort_key, which just
- does memcpy.
- Unpacking decimal values was supported only after fix for issue#253,
- because of that ha_rocksdb::get_storage_type() handles decimal values
- in a special way.
- */
- case MYSQL_TYPE_DATETIME2:
- case MYSQL_TYPE_TIMESTAMP2:
- /* These are packed with Field_temporal_with_date_and_timef::make_sort_key */
- case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */
- case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */
- /* Everything that comes here is packed with just a memcpy(). */
- m_unpack_func = &Rdb_key_def::unpack_binary_str;
- m_covered = true;
- return true;
+ case MYSQL_TYPE_FLOAT:
+ m_unpack_func = Rdb_key_def::unpack_float;
+ m_covered = true;
+ return true;
- case MYSQL_TYPE_NEWDATE:
+ case MYSQL_TYPE_NEWDECIMAL:
/*
- This is packed by Field_newdate::make_sort_key. It assumes the data is
- 3 bytes, and packing is done by swapping the byte order (for both big-
- and little-endian)
+ Decimal is packed with Field_new_decimal::make_sort_key, which just
+ does memcpy.
+ Unpacking decimal values was supported only after fix for issue#253,
+ because of that ha_rocksdb::get_storage_type() handles decimal values
+ in a special way.
*/
- m_unpack_func = &Rdb_key_def::unpack_newdate;
- m_covered = true;
- return true;
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_BLOB: {
- if (key_descr) {
- // The my_charset_bin collation is special in that it will consider
- // shorter strings sorting as less than longer strings.
- //
- // See Field_blob::make_sort_key for details.
- m_max_image_len =
+ case MYSQL_TYPE_DATETIME2:
+ case MYSQL_TYPE_TIMESTAMP2:
+ /* These are packed with Field_temporal_with_date_and_timef::make_sort_key
+ */
+ case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */
+ case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */
+ /* Everything that comes here is packed with just a memcpy(). */
+ m_unpack_func = Rdb_key_def::unpack_binary_str;
+ m_covered = true;
+ return true;
+
+ case MYSQL_TYPE_NEWDATE:
+ /*
+ This is packed by Field_newdate::make_sort_key. It assumes the data is
+ 3 bytes, and packing is done by swapping the byte order (for both big-
+ and little-endian)
+ */
+ m_unpack_func = Rdb_key_def::unpack_newdate;
+ m_covered = true;
+ return true;
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_BLOB: {
+ if (key_descr) {
+ // The my_charset_bin collation is special in that it will consider
+ // shorter strings sorting as less than longer strings.
+ //
+ // See Field_blob::make_sort_key for details.
+ m_max_image_len =
key_length + (field->charset()->number == COLLATION_BINARY
- ? reinterpret_cast<const Field_blob *>(field)
- ->pack_length_no_ptr()
- : 0);
- // Return false because indexes on text/blob will always require
- // a prefix. With a prefix, the optimizer will not be able to do an
- // index-only scan since there may be content occuring after the prefix
- // length.
- return false;
+ ? reinterpret_cast<const Field_blob *>(field)
+ ->pack_length_no_ptr()
+ : 0);
+ // Return false because indexes on text/blob will always require
+ // a prefix. With a prefix, the optimizer will not be able to do an
+ // index-only scan since there may be content occuring after the prefix
+ // length.
+ return false;
+ }
+ break;
}
- }
- default:
- break;
+ default:
+ break;
}
m_unpack_info_stores_value = false;
@@ -3184,8 +3324,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// The default for varchar is variable-length, without space-padding for
// comparisons
m_varchar_charset = cs;
- m_skip_func = &Rdb_key_def::skip_variable_length;
- m_pack_func = &Rdb_key_def::pack_with_varchar_encoding;
+ m_skip_func = Rdb_key_def::skip_variable_length;
+ m_pack_func = Rdb_key_def::pack_with_varchar_encoding;
if (!key_descr || key_descr->use_legacy_varbinary_format()) {
m_max_image_len = RDB_LEGACY_ENCODED_SIZE(m_max_image_len);
} else {
@@ -3210,8 +3350,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// - For VARBINARY(N), values may have different lengths, so we're using
// variable-length encoding. This is also the only charset where the
// values are not space-padded for comparison.
- m_unpack_func = is_varchar ? &Rdb_key_def::unpack_binary_or_utf8_varchar
- : &Rdb_key_def::unpack_binary_str;
+ m_unpack_func = is_varchar ? Rdb_key_def::unpack_binary_or_utf8_varchar
+ : Rdb_key_def::unpack_binary_str;
res = true;
} else if (cs->number == COLLATION_LATIN1_BIN || cs->number == COLLATION_UTF8_BIN) {
// For _bin collations, mem-comparable form of the string is the string
@@ -3221,10 +3361,10 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// VARCHARs - are compared as if they were space-padded - but are
// not actually space-padded (reading the value back produces the
// original value, without the padding)
- m_unpack_func = &Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad;
- m_skip_func = &Rdb_key_def::skip_variable_space_pad;
- m_pack_func = &Rdb_key_def::pack_with_varchar_space_pad;
- m_make_unpack_info_func = &Rdb_key_def::dummy_make_unpack_info;
+ m_unpack_func = Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad;
+ m_skip_func = Rdb_key_def::skip_variable_space_pad;
+ m_pack_func = Rdb_key_def::pack_with_varchar_space_pad;
+ m_make_unpack_info_func = Rdb_key_def::dummy_make_unpack_info;
m_segment_size = get_segment_size_from_collation(cs);
m_max_image_len =
(max_image_len_before_chunks / (m_segment_size - 1) + 1) *
@@ -3234,15 +3374,15 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
} else {
// SQL layer pads CHAR(N) values to their maximum length.
// We just store that and restore it back.
- m_unpack_func = (cs->number == COLLATION_LATIN1_BIN) ?
- &Rdb_key_def::unpack_binary_str
- : &Rdb_key_def::unpack_utf8_str;
+ m_unpack_func = (cs->number == COLLATION_LATIN1_BIN)
+ ? Rdb_key_def::unpack_binary_str
+ : Rdb_key_def::unpack_utf8_str;
}
res = true;
} else {
// This is [VAR]CHAR(n) and the collation is not $(charset_name)_bin
- res = true; // index-only scans are possible
+ res = true; // index-only scans are possible
m_unpack_data_len = is_varchar ? 0 : field->field_length;
const uint idx = is_varchar ? 0 : 1;
const Rdb_collation_codec *codec = nullptr;
@@ -3258,8 +3398,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// Currently we handle these collations as NO_PAD, even if they have
// PAD_SPACE attribute.
if (cs->levels_for_order == 1) {
- m_pack_func = &Rdb_key_def::pack_with_varchar_space_pad;
- m_skip_func = &Rdb_key_def::skip_variable_space_pad;
+ m_pack_func = Rdb_key_def::pack_with_varchar_space_pad;
+ m_skip_func = Rdb_key_def::skip_variable_space_pad;
m_segment_size = get_segment_size_from_collation(cs);
m_max_image_len =
(max_image_len_before_chunks / (m_segment_size - 1) + 1) *
@@ -3268,14 +3408,16 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
&space_mb_len);
} else {
// NO_LINT_DEBUG
- sql_print_warning("RocksDB: you're trying to create an index "
- "with a multi-level collation %s",
- cs->name);
+ sql_print_warning(
+ "RocksDB: you're trying to create an index "
+ "with a multi-level collation %s",
+ cs->name);
// NO_LINT_DEBUG
- sql_print_warning("MyRocks will handle this collation internally "
- " as if it had a NO_PAD attribute.");
- m_pack_func = &Rdb_key_def::pack_with_varchar_encoding;
- m_skip_func = &Rdb_key_def::skip_variable_length;
+ sql_print_warning(
+ "MyRocks will handle this collation internally "
+ " as if it had a NO_PAD attribute.");
+ m_pack_func = Rdb_key_def::pack_with_varchar_encoding;
+ m_skip_func = Rdb_key_def::skip_variable_length;
}
}
@@ -3291,18 +3433,18 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// form. Our way of restoring the original value is to keep a copy of
// the original value in unpack_info.
m_unpack_info_stores_value = true;
- m_make_unpack_info_func =
- is_varchar ? &Rdb_key_def::make_unpack_unknown_varchar
- : &Rdb_key_def::make_unpack_unknown;
- m_unpack_func = is_varchar ? &Rdb_key_def::unpack_unknown_varchar
- : &Rdb_key_def::unpack_unknown;
+ m_make_unpack_info_func = is_varchar
+ ? Rdb_key_def::make_unpack_unknown_varchar
+ : Rdb_key_def::make_unpack_unknown;
+ m_unpack_func = is_varchar ? Rdb_key_def::unpack_unknown_varchar
+ : Rdb_key_def::unpack_unknown;
} else {
// Same as above: we don't know how to restore the value from its
// mem-comparable form.
// Here, we just indicate to the SQL layer we can't do it.
DBUG_ASSERT(m_unpack_func == nullptr);
m_unpack_info_stores_value = false;
- res = false; // Indicate that index-only reads are not possible
+ res = false; // Indicate that index-only reads are not possible
}
}
@@ -3344,7 +3486,7 @@ Field *Rdb_field_packing::get_field_in_table(const TABLE *const tbl) const {
}
void Rdb_field_packing::fill_hidden_pk_val(uchar **dst,
- const longlong &hidden_pk_id) const {
+ const longlong hidden_pk_id) const {
DBUG_ASSERT(m_max_image_len == 8);
String to;
@@ -3387,8 +3529,8 @@ Rdb_tbl_def::~Rdb_tbl_def() {
*/
bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
- rocksdb::WriteBatch *const batch, uchar *const key,
- const size_t &keylen) {
+ rocksdb::WriteBatch *const batch,
+ const rocksdb::Slice &key) {
StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
indexes.alloc(Rdb_key_def::VERSION_SIZE +
m_key_count * Rdb_key_def::PACKED_SIZE * 2);
@@ -3429,10 +3571,12 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
}
rdb_netstr_append_uint32(&indexes, cf_id);
- rdb_netstr_append_uint32(&indexes, kd.m_index_number);
+
+ uint32 index_number = kd.get_index_number();
+ rdb_netstr_append_uint32(&indexes, index_number);
struct Rdb_index_info index_info;
- index_info.m_gl_index_id = {cf_id, kd.m_index_number};
+ index_info.m_gl_index_id = {cf_id, index_number};
index_info.m_index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST;
index_info.m_index_type = kd.m_index_type;
index_info.m_kv_version = kd.m_kv_format_version;
@@ -3442,10 +3586,9 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
dict->add_or_update_index_cf_mapping(batch, &index_info);
}
- const rocksdb::Slice skey((char *)key, keylen);
const rocksdb::Slice svalue(indexes.c_ptr(), indexes.length());
- dict->put_key(batch, skey, svalue);
+ dict->put_key(batch, key, svalue);
return false;
}
@@ -3461,7 +3604,6 @@ bool Rdb_key_def::has_index_flag(uint32 index_flags, enum INDEX_FLAG flag) {
uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
enum INDEX_FLAG flag,
uint *const length) {
-
DBUG_ASSERT_IMP(flag != MAX_FLAG,
Rdb_key_def::has_index_flag(index_flags, flag));
@@ -3496,7 +3638,9 @@ void Rdb_key_def::write_index_flag_field(Rdb_string_writer *const buf,
void Rdb_tbl_def::check_if_is_mysql_system_table() {
static const char *const system_dbs[] = {
- "mysql", "performance_schema", "information_schema",
+ "mysql",
+ "performance_schema",
+ "information_schema",
};
m_is_mysql_system_table = false;
@@ -3508,6 +3652,15 @@ void Rdb_tbl_def::check_if_is_mysql_system_table() {
}
}
+void Rdb_tbl_def::check_and_set_read_free_rpl_table() {
+ m_is_read_free_rpl_table =
+#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported
+ rdb_read_free_regex_handler.matches(base_tablename());
+#else
+ false;
+#endif
+}
+
void Rdb_tbl_def::set_name(const std::string &name) {
int err MY_ATTRIBUTE((__unused__));
@@ -3533,30 +3686,6 @@ GL_INDEX_ID Rdb_tbl_def::get_autoincr_gl_index_id() {
return GL_INDEX_ID();
}
-/*
- Static function of type my_hash_get_key that gets invoked by
- the m_ddl_hash object of type my_core::HASH.
- It manufactures a key (db+table name in our case) from a record
- (Rdb_tbl_def in our case).
-*/
-const uchar *
-Rdb_ddl_manager::get_hash_key(Rdb_tbl_def *const rec, size_t *const length,
- my_bool not_used MY_ATTRIBUTE((__unused__))) {
- const std::string &dbname_tablename = rec->full_tablename();
- *length = dbname_tablename.size();
- return reinterpret_cast<const uchar *>(dbname_tablename.c_str());
-}
-
-/*
- Static function of type void (*my_hash_free_element_func_t)(void*) that gets
- invoked by the m_ddl_hash object of type my_core::HASH.
- It deletes a record (Rdb_tbl_def in our case).
-*/
-void Rdb_ddl_manager::free_hash_elem(void *const data) {
- Rdb_tbl_def *elem = reinterpret_cast<Rdb_tbl_def *>(data);
- delete elem;
-}
-
void Rdb_ddl_manager::erase_index_num(const GL_INDEX_ID &gl_index_id) {
m_index_num_to_keydef.erase(gl_index_id);
}
@@ -3579,7 +3708,7 @@ void Rdb_ddl_manager::remove_uncommitted_keydefs(
mysql_rwlock_unlock(&m_rwlock);
}
-namespace // anonymous namespace = not visible outside this source file
+namespace // anonymous namespace = not visible outside this source file
{
struct Rdb_validate_tbls : public Rdb_tables_scanner {
using tbl_info_t = std::pair<std::string, bool>;
@@ -3597,7 +3726,7 @@ struct Rdb_validate_tbls : public Rdb_tables_scanner {
bool check_frm_file(const std::string &fullpath, const std::string &dbname,
const std::string &tablename, bool *has_errors);
};
-} // anonymous namespace
+} // anonymous namespace
/*
Get a list of tables that we expect to have .frm files for. This will use the
@@ -3641,6 +3770,7 @@ bool Rdb_validate_tbls::check_frm_file(const std::string &fullpath,
//enum legacy_db_type eng_type;
frm_type_enum type = dd_frm_type(nullptr, fullfilename.c_ptr(), &eng_type_str);
if (type == FRMTYPE_ERROR) {
+ // NO_LINT_DEBUG
sql_print_warning("RocksDB: Failed to open/read .from file: %s",
fullfilename.ptr());
return false;
@@ -3655,10 +3785,12 @@ bool Rdb_validate_tbls::check_frm_file(const std::string &fullpath,
*/
tbl_info_t element(tablename, false);
if (m_list.count(dbname) == 0 || m_list[dbname].erase(element) == 0) {
- sql_print_warning("RocksDB: Schema mismatch - "
- "A .frm file exists for table %s.%s, "
- "but that table is not registered in RocksDB",
- dbname.c_str(), tablename.c_str());
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Schema mismatch - "
+ "A .frm file exists for table %s.%s, "
+ "but that table is not registered in RocksDB",
+ dbname.c_str(), tablename.c_str());
*has_errors = true;
}
} else if (!strncmp(eng_type_str.str, "partition", eng_type_str.length)) {
@@ -3686,6 +3818,7 @@ bool Rdb_validate_tbls::scan_for_frms(const std::string &datadir,
/* Access the directory */
if (dir_info == nullptr) {
+ // NO_LINT_DEBUG
sql_print_warning("RocksDB: Could not open database directory: %s",
fullpath.c_str());
return false;
@@ -3732,6 +3865,7 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir,
dir_info = my_dir(datadir.c_str(), MYF(MY_DONT_SORT | MY_WANT_STAT));
if (dir_info == nullptr) {
+ // NO_LINT_DEBUG
sql_print_warning("RocksDB: could not open datadir: %s", datadir.c_str());
return false;
}
@@ -3739,12 +3873,10 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir,
file_info = dir_info->dir_entry;
for (uint ii = 0; ii < dir_info->number_of_files; ii++, file_info++) {
/* Ignore files/dirs starting with '.' */
- if (file_info->name[0] == '.')
- continue;
+ if (file_info->name[0] == '.') continue;
/* Ignore all non-directory files */
- if (!MY_S_ISDIR(file_info->mystat->st_mode))
- continue;
+ if (!MY_S_ISDIR(file_info->mystat->st_mode)) continue;
/* Scan all the .frm files in the directory */
if (!scan_for_frms(datadir, file_info->name, has_errors)) {
@@ -3777,8 +3909,9 @@ bool Rdb_ddl_manager::validate_auto_incr() {
GL_INDEX_ID gl_index_id;
if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE &&
- memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE))
+ memcmp(key.data(), auto_incr_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) {
break;
+ }
if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3) {
return false;
@@ -3795,10 +3928,11 @@ bool Rdb_ddl_manager::validate_auto_incr() {
rdb_netbuf_read_gl_index(&ptr, &gl_index_id);
if (!m_dict->get_index_info(gl_index_id, nullptr)) {
// NO_LINT_DEBUG
- sql_print_warning("RocksDB: AUTOINC mismatch - "
- "Index number (%u, %u) found in AUTOINC "
- "but does not exist as a DDL entry",
- gl_index_id.cf_id, gl_index_id.index_id);
+ sql_print_warning(
+ "RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "but does not exist as a DDL entry",
+ gl_index_id.cf_id, gl_index_id.index_id);
return false;
}
@@ -3806,10 +3940,11 @@ bool Rdb_ddl_manager::validate_auto_incr() {
const int version = rdb_netbuf_read_uint16(&ptr);
if (version > Rdb_key_def::AUTO_INCREMENT_VERSION) {
// NO_LINT_DEBUG
- sql_print_warning("RocksDB: AUTOINC mismatch - "
- "Index number (%u, %u) found in AUTOINC "
- "is on unsupported version %d",
- gl_index_id.cf_id, gl_index_id.index_id, version);
+ sql_print_warning(
+ "RocksDB: AUTOINC mismatch - "
+ "Index number (%u, %u) found in AUTOINC "
+ "is on unsupported version %d",
+ gl_index_id.cf_id, gl_index_id.index_id, version);
return false;
}
}
@@ -3846,10 +3981,12 @@ bool Rdb_ddl_manager::validate_schemas(void) {
*/
for (const auto &db : table_list.m_list) {
for (const auto &table : db.second) {
- sql_print_warning("RocksDB: Schema mismatch - "
- "Table %s.%s is registered in RocksDB "
- "but does not have a .frm file",
- db.first.c_str(), table.first.c_str());
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Schema mismatch - "
+ "Table %s.%s is registered in RocksDB "
+ "but does not have a .frm file",
+ db.first.c_str(), table.first.c_str());
has_errors = true;
}
}
@@ -3859,14 +3996,9 @@ bool Rdb_ddl_manager::validate_schemas(void) {
bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_cf_manager *const cf_manager,
- const uint32_t &validate_tables) {
- const ulong TABLE_HASH_SIZE = 32;
+ const uint32_t validate_tables) {
m_dict = dict_arg;
mysql_rwlock_init(0, &m_rwlock);
- (void)my_hash_init(&m_ddl_hash,
- /*system_charset_info*/ &my_charset_bin, TABLE_HASH_SIZE,
- 0, 0, (my_hash_get_key)Rdb_ddl_manager::get_hash_key,
- Rdb_ddl_manager::free_hash_elem, 0);
/* Read the data dictionary and populate the hash */
uchar ddl_entry[Rdb_key_def::INDEX_NUMBER_SIZE];
@@ -3888,10 +4020,12 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
const rocksdb::Slice val = it->value();
if (key.size() >= Rdb_key_def::INDEX_NUMBER_SIZE &&
- memcmp(key.data(), ddl_entry, Rdb_key_def::INDEX_NUMBER_SIZE))
+ memcmp(key.data(), ddl_entry, Rdb_key_def::INDEX_NUMBER_SIZE)) {
break;
+ }
if (key.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) {
+ // NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: key has length %d (corruption?)",
(int)key.size());
return true;
@@ -3903,6 +4037,7 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
// Now, read the DDLs.
const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
@@ -3913,9 +4048,11 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
ptr = reinterpret_cast<const uchar *>(val.data());
const int version = rdb_netbuf_read_uint16(&ptr);
if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
- sql_print_error("RocksDB: DDL ENTRY Version was not expected."
- "Expected: %d, Actual: %d",
- Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: DDL ENTRY Version was not expected."
+ "Expected: %d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
return true;
}
ptr_end = ptr + real_val_size;
@@ -3925,32 +4062,40 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
uint flags = 0;
struct Rdb_index_info index_info;
if (!m_dict->get_index_info(gl_index_id, &index_info)) {
- sql_print_error("RocksDB: Could not get index information "
- "for Index Number (%u,%u), table %s",
- gl_index_id.cf_id, gl_index_id.index_id,
- tdef->full_tablename().c_str());
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Could not get index information "
+ "for Index Number (%u,%u), table %s",
+ gl_index_id.cf_id, gl_index_id.index_id,
+ tdef->full_tablename().c_str());
return true;
}
if (max_index_id_in_dict < gl_index_id.index_id) {
- sql_print_error("RocksDB: Found max index id %u from data dictionary "
- "but also found larger index id %u from dictionary. "
- "This should never happen and possibly a bug.",
- max_index_id_in_dict, gl_index_id.index_id);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Found max index id %u from data dictionary "
+ "but also found larger index id %u from dictionary. "
+ "This should never happen and possibly a bug.",
+ max_index_id_in_dict, gl_index_id.index_id);
return true;
}
if (!m_dict->get_cf_flags(gl_index_id.cf_id, &flags)) {
- sql_print_error("RocksDB: Could not get Column Family Flags "
- "for CF Number %d, table %s",
- gl_index_id.cf_id, tdef->full_tablename().c_str());
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Could not get Column Family Flags "
+ "for CF Number %d, table %s",
+ gl_index_id.cf_id, tdef->full_tablename().c_str());
return true;
}
if ((flags & Rdb_key_def::AUTO_CF_FLAG) != 0) {
// The per-index cf option is deprecated. Make sure we don't have the
// flag set in any existing database. NO_LINT_DEBUG
- sql_print_error("RocksDB: The defunct AUTO_CF_FLAG is enabled for CF "
- "number %d, table %s",
- gl_index_id.cf_id, tdef->full_tablename().c_str());
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: The defunct AUTO_CF_FLAG is enabled for CF "
+ "number %d, table %s",
+ gl_index_id.cf_id, tdef->full_tablename().c_str());
}
rocksdb::ColumnFamilyHandle *const cfh =
@@ -3988,11 +4133,13 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
if (validate_tables > 0) {
std::string msg;
if (!validate_schemas()) {
- msg = "RocksDB: Problems validating data dictionary "
- "against .frm files, exiting";
+ msg =
+ "RocksDB: Problems validating data dictionary "
+ "against .frm files, exiting";
} else if (!validate_auto_incr()) {
- msg = "RocksDB: Problems validating auto increment values in "
- "data dictionary, exiting";
+ msg =
+ "RocksDB: Problems validating auto increment values in "
+ "data dictionary, exiting";
}
if (validate_tables == 1 && !msg.empty()) {
// NO_LINT_DEBUG
@@ -4014,20 +4161,23 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
return true;
}
delete it;
+ // NO_LINT_DEBUG
sql_print_information("RocksDB: Table_store: loaded DDL data for %d tables",
i);
return false;
}
Rdb_tbl_def *Rdb_ddl_manager::find(const std::string &table_name,
- const bool &lock) {
+ const bool lock) {
if (lock) {
mysql_rwlock_rdlock(&m_rwlock);
}
- Rdb_tbl_def *const rec = reinterpret_cast<Rdb_tbl_def *>(my_hash_search(
- &m_ddl_hash, reinterpret_cast<const uchar *>(table_name.c_str()),
- table_name.size()));
+ Rdb_tbl_def *rec = nullptr;
+ const auto it = m_ddl_map.find(table_name);
+ if (it != m_ddl_map.end()) {
+ rec = it->second;
+ }
if (lock) {
mysql_rwlock_unlock(&m_rwlock);
@@ -4040,8 +4190,8 @@ Rdb_tbl_def *Rdb_ddl_manager::find(const std::string &table_name,
// lock on m_rwlock to make sure the Rdb_key_def is not discarded while we
// are finding it. Copying it into 'ret' increments the count making sure
// that the object will not be discarded until we are finished with it.
-std::shared_ptr<const Rdb_key_def>
-Rdb_ddl_manager::safe_find(GL_INDEX_ID gl_index_id) {
+std::shared_ptr<const Rdb_key_def> Rdb_ddl_manager::safe_find(
+ GL_INDEX_ID gl_index_id) {
std::shared_ptr<const Rdb_key_def> ret(nullptr);
mysql_rwlock_rdlock(&m_rwlock);
@@ -4071,8 +4221,8 @@ Rdb_ddl_manager::safe_find(GL_INDEX_ID gl_index_id) {
}
// this method assumes at least read-only lock on m_rwlock
-const std::shared_ptr<Rdb_key_def> &
-Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) {
+const std::shared_ptr<Rdb_key_def> &Rdb_ddl_manager::find(
+ GL_INDEX_ID gl_index_id) {
auto it = m_index_num_to_keydef.find(gl_index_id);
if (it != m_index_num_to_keydef.end()) {
auto table_def = find(it->second.first, false);
@@ -4095,8 +4245,8 @@ Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) {
// this method returns the name of the table based on an index id. It acquires
// a read lock on m_rwlock.
-const std::string
-Rdb_ddl_manager::safe_get_table_name(const GL_INDEX_ID &gl_index_id) {
+const std::string Rdb_ddl_manager::safe_get_table_name(
+ const GL_INDEX_ID &gl_index_id) {
std::string ret;
mysql_rwlock_rdlock(&m_rwlock);
auto it = m_index_num_to_keydef.find(gl_index_id);
@@ -4145,7 +4295,7 @@ void Rdb_ddl_manager::adjust_stats(
}
}
-void Rdb_ddl_manager::persist_stats(const bool &sync) {
+void Rdb_ddl_manager::persist_stats(const bool sync) {
mysql_rwlock_wrlock(&m_rwlock);
const auto local_stats2store = std::move(m_stats2store);
m_stats2store.clear();
@@ -4170,18 +4320,15 @@ void Rdb_ddl_manager::persist_stats(const bool &sync) {
int Rdb_ddl_manager::put_and_write(Rdb_tbl_def *const tbl,
rocksdb::WriteBatch *const batch) {
- uchar buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE];
- uint pos = 0;
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> buf_writer;
- rdb_netbuf_store_index(buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
- pos += Rdb_key_def::INDEX_NUMBER_SIZE;
+ buf_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
const std::string &dbname_tablename = tbl->full_tablename();
- memcpy(buf + pos, dbname_tablename.c_str(), dbname_tablename.size());
- pos += dbname_tablename.size();
+ buf_writer.write(dbname_tablename.c_str(), dbname_tablename.size());
int res;
- if ((res = tbl->put_dict(m_dict, batch, buf, pos))) {
+ if ((res = tbl->put_dict(m_dict, batch, buf_writer.to_slice()))) {
return res;
}
if ((res = put(tbl))) {
@@ -4192,62 +4339,58 @@ int Rdb_ddl_manager::put_and_write(Rdb_tbl_def *const tbl,
/* Return 0 - ok, other value - error */
/* TODO:
- This function modifies m_ddl_hash and m_index_num_to_keydef.
+ This function modifies m_ddl_map and m_index_num_to_keydef.
However, these changes need to be reversed if dict_manager.commit fails
See the discussion here: https://reviews.facebook.net/D35925#inline-259167
Tracked by https://github.com/facebook/mysql-5.6/issues/33
*/
-int Rdb_ddl_manager::put(Rdb_tbl_def *const tbl, const bool &lock) {
+int Rdb_ddl_manager::put(Rdb_tbl_def *const tbl, const bool lock) {
Rdb_tbl_def *rec;
- my_bool result;
const std::string &dbname_tablename = tbl->full_tablename();
- if (lock)
- mysql_rwlock_wrlock(&m_rwlock);
+ if (lock) mysql_rwlock_wrlock(&m_rwlock);
// We have to do this find because 'tbl' is not yet in the list. We need
// to find the one we are replacing ('rec')
rec = find(dbname_tablename, false);
if (rec) {
- // this will free the old record.
- my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar *>(rec));
+ // Free the old record.
+ delete rec;
+ m_ddl_map.erase(dbname_tablename);
}
- result = my_hash_insert(&m_ddl_hash, reinterpret_cast<uchar *>(tbl));
+ m_ddl_map.emplace(dbname_tablename, tbl);
for (uint keyno = 0; keyno < tbl->m_key_count; keyno++) {
m_index_num_to_keydef[tbl->m_key_descr_arr[keyno]->get_gl_index_id()] =
std::make_pair(dbname_tablename, keyno);
}
+ tbl->check_and_set_read_free_rpl_table();
- if (lock)
- mysql_rwlock_unlock(&m_rwlock);
- return result;
+ if (lock) mysql_rwlock_unlock(&m_rwlock);
+ return 0;
}
void Rdb_ddl_manager::remove(Rdb_tbl_def *const tbl,
rocksdb::WriteBatch *const batch,
- const bool &lock) {
- if (lock)
- mysql_rwlock_wrlock(&m_rwlock);
-
- uchar buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE];
- uint pos = 0;
-
- rdb_netbuf_store_index(buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
- pos += Rdb_key_def::INDEX_NUMBER_SIZE;
+ const bool lock) {
+ if (lock) mysql_rwlock_wrlock(&m_rwlock);
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> key_writer;
+ key_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
const std::string &dbname_tablename = tbl->full_tablename();
- memcpy(buf + pos, dbname_tablename.c_str(), dbname_tablename.size());
- pos += dbname_tablename.size();
+ key_writer.write(dbname_tablename.c_str(), dbname_tablename.size());
- const rocksdb::Slice tkey((char *)buf, pos);
- m_dict->delete_key(batch, tkey);
+ m_dict->delete_key(batch, key_writer.to_slice());
- /* The following will also delete the object: */
- my_hash_delete(&m_ddl_hash, reinterpret_cast<uchar *>(tbl));
+ const auto it = m_ddl_map.find(dbname_tablename);
+ if (it != m_ddl_map.end()) {
+ // Free Rdb_tbl_def
+ delete it->second;
- if (lock)
- mysql_rwlock_unlock(&m_rwlock);
+ m_ddl_map.erase(it);
+ }
+
+ if (lock) mysql_rwlock_unlock(&m_rwlock);
}
bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
@@ -4255,8 +4398,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
Rdb_tbl_def *rec;
Rdb_tbl_def *new_rec;
bool res = true;
- uchar new_buf[FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE];
- uint new_pos = 0;
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> new_buf_writer;
mysql_rwlock_wrlock(&m_rwlock);
if (!(rec = find(from, false))) {
@@ -4278,18 +4420,16 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_key_descr_arr = nullptr;
// Create a new key
- rdb_netbuf_store_index(new_buf, Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
- new_pos += Rdb_key_def::INDEX_NUMBER_SIZE;
+ new_buf_writer.write_index(Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER);
const std::string &dbname_tablename = new_rec->full_tablename();
- memcpy(new_buf + new_pos, dbname_tablename.c_str(), dbname_tablename.size());
- new_pos += dbname_tablename.size();
+ new_buf_writer.write(dbname_tablename.c_str(), dbname_tablename.size());
// Create a key to add
- if (!new_rec->put_dict(m_dict, batch, new_buf, new_pos)) {
+ if (!new_rec->put_dict(m_dict, batch, new_buf_writer.to_slice())) {
remove(rec, batch, false);
put(new_rec, false);
- res = false; // ok
+ res = false; // ok
}
mysql_rwlock_unlock(&m_rwlock);
@@ -4297,13 +4437,17 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
}
void Rdb_ddl_manager::cleanup() {
- my_hash_free(&m_ddl_hash);
+ for (const auto &kv : m_ddl_map) {
+ delete kv.second;
+ }
+ m_ddl_map.clear();
+
mysql_rwlock_destroy(&m_rwlock);
m_sequence.cleanup();
}
int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner *const tables_scanner) {
- int i, ret;
+ int ret;
Rdb_tbl_def *rec;
DBUG_ASSERT(tables_scanner != nullptr);
@@ -4311,14 +4455,11 @@ int Rdb_ddl_manager::scan_for_tables(Rdb_tables_scanner *const tables_scanner) {
mysql_rwlock_rdlock(&m_rwlock);
ret = 0;
- i = 0;
- while ((
- rec = reinterpret_cast<Rdb_tbl_def *>(my_hash_element(&m_ddl_hash, i)))) {
+ for (const auto &kv : m_ddl_map) {
+ rec = kv.second;
ret = tables_scanner->add_table(rec);
- if (ret)
- break;
- i++;
+ if (ret) break;
}
mysql_rwlock_unlock(&m_rwlock);
@@ -4333,9 +4474,9 @@ bool Rdb_binlog_manager::init(Rdb_dict_manager *const dict_arg) {
DBUG_ASSERT(dict_arg != nullptr);
m_dict = dict_arg;
- rdb_netbuf_store_index(m_key_buf, Rdb_key_def::BINLOG_INFO_INDEX_NUMBER);
- m_key_slice = rocksdb::Slice(reinterpret_cast<char *>(m_key_buf),
- Rdb_key_def::INDEX_NUMBER_SIZE);
+ m_key_writer.reset();
+ m_key_writer.write_index(Rdb_key_def::BINLOG_INFO_INDEX_NUMBER);
+ m_key_slice = m_key_writer.to_slice();
return false;
}
@@ -4357,10 +4498,36 @@ void Rdb_binlog_manager::update(const char *const binlog_name,
if (binlog_name && binlog_pos) {
// max binlog length (512) + binlog pos (4) + binlog gtid (57) < 1024
const size_t RDB_MAX_BINLOG_INFO_LEN = 1024;
- uchar value_buf[RDB_MAX_BINLOG_INFO_LEN];
- m_dict->put_key(
- batch, m_key_slice,
- pack_value(value_buf, binlog_name, binlog_pos, NULL));
+ Rdb_buf_writer<RDB_MAX_BINLOG_INFO_LEN> value_writer;
+
+ // store version
+ value_writer.write_uint16(Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION);
+
+ // store binlog file name length
+ DBUG_ASSERT(strlen(binlog_name) <= FN_REFLEN);
+ const uint16_t binlog_name_len = strlen(binlog_name);
+ value_writer.write_uint16(binlog_name_len);
+
+ // store binlog file name
+ value_writer.write(binlog_name, binlog_name_len);
+
+ // store binlog pos
+ value_writer.write_uint32(binlog_pos);
+
+#ifdef MARIADB_MERGE_2019
+ // store binlog gtid length.
+ // If gtid was not set, store 0 instead
+ const uint16_t binlog_max_gtid_len =
+ binlog_max_gtid ? strlen(binlog_max_gtid) : 0;
+ value_writer.write_uint16(binlog_max_gtid_len);
+
+ if (binlog_max_gtid_len > 0) {
+ // store binlog gtid
+ value_writer.write(binlog_max_gtid, binlog_max_gtid_len);
+ }
+#endif
+
+ m_dict->put_key(batch, m_key_slice, value_writer.to_slice());
}
}
@@ -4382,67 +4549,15 @@ bool Rdb_binlog_manager::read(char *const binlog_name,
rocksdb::Status status = m_dict->get_value(m_key_slice, &value);
if (status.ok()) {
if (!unpack_value((const uchar *)value.c_str(), value.size(), binlog_name, binlog_pos,
- binlog_gtid))
+ binlog_gtid)) {
ret = true;
+ }
}
}
return ret;
}
/**
- Pack binlog_name, binlog_pos, binlog_gtid into preallocated
- buffer, then converting and returning a RocksDB Slice
- @param buf Preallocated buffer to set binlog info.
- @param binlog_name Binlog name
- @param binlog_pos Binlog pos
- @return rocksdb::Slice converted from buf and its length
-*/
-rocksdb::Slice
-Rdb_binlog_manager::pack_value(uchar *const buf, const char *const binlog_name,
- const my_off_t &binlog_pos,
- const char *const binlog_gtid) const {
- uint pack_len = 0;
-
- // store version
- rdb_netbuf_store_uint16(buf, Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION);
- pack_len += Rdb_key_def::VERSION_SIZE;
-
- // store binlog file name length
- DBUG_ASSERT(strlen(binlog_name) <= FN_REFLEN);
- const uint16_t binlog_name_len = (uint16_t)strlen(binlog_name);
- rdb_netbuf_store_uint16(buf + pack_len, binlog_name_len);
- pack_len += sizeof(uint16);
-
- // store binlog file name
- memcpy(buf + pack_len, binlog_name, binlog_name_len);
- pack_len += binlog_name_len;
-
- // store binlog pos
- rdb_netbuf_store_uint32(buf + pack_len, binlog_pos);
- pack_len += sizeof(uint32);
-
- // store binlog gtid length.
- // If gtid was not set, store 0 instead
-#ifdef MARIAROCKS_NOT_YET
- const uint16_t binlog_gtid_len = binlog_gtid ? (uint16_t)strlen(binlog_gtid) : 0;
- rdb_netbuf_store_uint16(buf + pack_len, binlog_gtid_len);
-#endif
- pack_len += sizeof(uint16);
- // MariaDB:
- rdb_netbuf_store_uint16(buf + pack_len, 0);
-
-#ifdef MARIAROCKS_NOT_YET
- if (binlog_gtid_len > 0) {
- // store binlog gtid
- memcpy(buf + pack_len, binlog_gtid, binlog_gtid_len);
- pack_len += binlog_gtid_len;
- }
-#endif
-
- return rocksdb::Slice((char *)buf, pack_len);
-}
-
-/**
Unpack value then split into binlog_name, binlog_pos (and binlog_gtid)
@param[IN] value Binlog state info fetched from RocksDB
@param[OUT] binlog_name Binlog name
@@ -4466,8 +4581,7 @@ bool Rdb_binlog_manager::unpack_value(const uchar *const value,
const uint16_t version = rdb_netbuf_to_uint16(value);
pack_len += Rdb_key_def::VERSION_SIZE;
- if (version != Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION)
- return true;
+ if (version != Rdb_key_def::BINLOG_INFO_INDEX_NUMBER_VERSION) return true;
if ((value_size -= sizeof(uint16)) < 0)
return true;
@@ -4525,7 +4639,7 @@ bool Rdb_binlog_manager::unpack_value(const uchar *const value,
@param[IN] write_batch Handle to storage engine writer.
*/
void Rdb_binlog_manager::update_slave_gtid_info(
- const uint &id, const char *const db, const char *const gtid,
+ const uint id, const char *const db, const char *const gtid,
rocksdb::WriteBatchBase *const write_batch) {
if (id && db && gtid) {
// Make sure that if the slave_gtid_info table exists we have a
@@ -4545,41 +4659,30 @@ void Rdb_binlog_manager::update_slave_gtid_info(
String value;
// Build key
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE + 4] = {0};
- uchar *buf = key_buf;
- rdb_netbuf_store_index(buf, kd->get_index_number());
- buf += Rdb_key_def::INDEX_NUMBER_SIZE;
- rdb_netbuf_store_uint32(buf, id);
- buf += 4;
- const rocksdb::Slice key_slice =
- rocksdb::Slice((const char *)key_buf, buf - key_buf);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE + 4> key_writer;
+ key_writer.write_index(kd->get_index_number());
+ key_writer.write_uint32(id);
// Build value
- uchar value_buf[128] = {0};
+ Rdb_buf_writer<128> value_writer;
DBUG_ASSERT(gtid);
const uint db_len = strlen(db);
const uint gtid_len = strlen(gtid);
- buf = value_buf;
// 1 byte used for flags. Empty here.
- buf++;
+ value_writer.write_byte(0);
// Write column 1.
DBUG_ASSERT(strlen(db) <= 64);
- rdb_netbuf_store_byte(buf, db_len);
- buf++;
- memcpy(buf, db, db_len);
- buf += db_len;
+ value_writer.write_byte(db_len);
+ value_writer.write(db, db_len);
// Write column 2.
DBUG_ASSERT(gtid_len <= 56);
- rdb_netbuf_store_byte(buf, gtid_len);
- buf++;
- memcpy(buf, gtid, gtid_len);
- buf += gtid_len;
- const rocksdb::Slice value_slice =
- rocksdb::Slice((const char *)value_buf, buf - value_buf);
+ value_writer.write_byte(gtid_len);
+ value_writer.write(gtid, gtid_len);
- write_batch->Put(kd->get_cf(), key_slice, value_slice);
+ write_batch->Put(kd->get_cf(), key_writer.to_slice(),
+ value_writer.to_slice());
}
}
@@ -4651,16 +4754,15 @@ rocksdb::Iterator *Rdb_dict_manager::new_iterator() const {
}
int Rdb_dict_manager::commit(rocksdb::WriteBatch *const batch,
- const bool &sync) const {
- if (!batch)
- return HA_ERR_ROCKSDB_COMMIT_FAILED;
+ const bool sync) const {
+ if (!batch) return HA_ERR_ROCKSDB_COMMIT_FAILED;
int res = HA_EXIT_SUCCESS;
rocksdb::WriteOptions options;
options.sync = sync;
rocksdb::TransactionDBWriteOptimizations optimize;
optimize.skip_concurrency_control = true;
rocksdb::Status s = m_db->Write(options, optimize, batch);
- res = !s.ok(); // we return true when something failed
+ res = !s.ok(); // we return true when something failed
if (res) {
rdb_handle_io_error(s, RDB_IO_ERROR_DICT_COMMIT);
}
@@ -4681,54 +4783,44 @@ void Rdb_dict_manager::dump_index_id(uchar *const netbuf,
void Rdb_dict_manager::delete_with_prefix(
rocksdb::WriteBatch *const batch, Rdb_key_def::DATA_DICT_TYPE dict_type,
const GL_INDEX_ID &gl_index_id) const {
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- dump_index_id(key_buf, dict_type, gl_index_id);
- rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, dict_type, gl_index_id);
- delete_key(batch, key);
+ delete_key(batch, key_writer.to_slice());
}
void Rdb_dict_manager::add_or_update_index_cf_mapping(
rocksdb::WriteBatch *batch, struct Rdb_index_info *const index_info) const {
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- uchar value_buf[256] = {0};
- dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, index_info->m_gl_index_id);
- const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
-
- uchar *ptr = value_buf;
- rdb_netbuf_store_uint16(ptr, Rdb_key_def::INDEX_INFO_VERSION_LATEST);
- ptr += RDB_SIZEOF_INDEX_INFO_VERSION;
- rdb_netbuf_store_byte(ptr, index_info->m_index_type);
- ptr += RDB_SIZEOF_INDEX_TYPE;
- rdb_netbuf_store_uint16(ptr, index_info->m_kv_version);
- ptr += RDB_SIZEOF_KV_VERSION;
- rdb_netbuf_store_uint32(ptr, index_info->m_index_flags);
- ptr += RDB_SIZEOF_INDEX_FLAGS;
- rdb_netbuf_store_uint64(ptr, index_info->m_ttl_duration);
- ptr += ROCKSDB_SIZEOF_TTL_RECORD;
-
- const rocksdb::Slice value =
- rocksdb::Slice((char *)value_buf, ptr - value_buf);
- batch->Put(m_system_cfh, key, value);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_INFO,
+ index_info->m_gl_index_id);
+
+ Rdb_buf_writer<256> value_writer;
+
+ value_writer.write_uint16(Rdb_key_def::INDEX_INFO_VERSION_LATEST);
+ value_writer.write_byte(index_info->m_index_type);
+ value_writer.write_uint16(index_info->m_kv_version);
+ value_writer.write_uint32(index_info->m_index_flags);
+ value_writer.write_uint64(index_info->m_ttl_duration);
+
+ batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice());
}
void Rdb_dict_manager::add_cf_flags(rocksdb::WriteBatch *const batch,
- const uint32_t &cf_id,
- const uint32_t &cf_flags) const {
+ const uint32_t cf_id,
+ const uint32_t cf_flags) const {
DBUG_ASSERT(batch != nullptr);
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0};
- uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE] =
- {0};
- rdb_netbuf_store_uint32(key_buf, Rdb_key_def::CF_DEFINITION);
- rdb_netbuf_store_uint32(key_buf + Rdb_key_def::INDEX_NUMBER_SIZE, cf_id);
- const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
-
- rdb_netbuf_store_uint16(value_buf, Rdb_key_def::CF_DEFINITION_VERSION);
- rdb_netbuf_store_uint32(value_buf + Rdb_key_def::VERSION_SIZE, cf_flags);
- const rocksdb::Slice value =
- rocksdb::Slice((char *)value_buf, sizeof(value_buf));
- batch->Put(m_system_cfh, key, value);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 2> key_writer;
+ key_writer.write_uint32(Rdb_key_def::CF_DEFINITION);
+ key_writer.write_uint32(cf_id);
+
+ Rdb_buf_writer<Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE>
+ value_writer;
+ value_writer.write_uint16(Rdb_key_def::CF_DEFINITION_VERSION);
+ value_writer.write_uint32(cf_flags);
+
+ batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice());
}
void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch,
@@ -4741,7 +4833,6 @@ void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch,
bool Rdb_dict_manager::get_index_info(
const GL_INDEX_ID &gl_index_id,
struct Rdb_index_info *const index_info) const {
-
if (index_info) {
index_info->m_gl_index_id = gl_index_id;
}
@@ -4749,11 +4840,10 @@ bool Rdb_dict_manager::get_index_info(
bool found = false;
bool error = false;
std::string value;
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, gl_index_id);
- const rocksdb::Slice &key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_INFO, gl_index_id);
- const rocksdb::Status &status = get_value(key, &value);
+ const rocksdb::Status &status = get_value(key_writer.to_slice(), &value);
if (status.ok()) {
if (!index_info) {
return true;
@@ -4765,73 +4855,73 @@ bool Rdb_dict_manager::get_index_info(
ptr += RDB_SIZEOF_INDEX_INFO_VERSION;
switch (index_info->m_index_dict_version) {
- case Rdb_key_def::INDEX_INFO_VERSION_FIELD_FLAGS:
- /* Sanity check to prevent reading bogus TTL record. */
- if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
- RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
- RDB_SIZEOF_INDEX_FLAGS +
- ROCKSDB_SIZEOF_TTL_RECORD) {
- error = true;
+ case Rdb_key_def::INDEX_INFO_VERSION_FIELD_FLAGS:
+ /* Sanity check to prevent reading bogus TTL record. */
+ if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
+ RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
+ RDB_SIZEOF_INDEX_FLAGS +
+ ROCKSDB_SIZEOF_TTL_RECORD) {
+ error = true;
+ break;
+ }
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ ptr += RDB_SIZEOF_KV_VERSION;
+ index_info->m_index_flags = rdb_netbuf_to_uint32(ptr);
+ ptr += RDB_SIZEOF_INDEX_FLAGS;
+ index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ found = true;
break;
- }
- index_info->m_index_type = rdb_netbuf_to_byte(ptr);
- ptr += RDB_SIZEOF_INDEX_TYPE;
- index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
- ptr += RDB_SIZEOF_KV_VERSION;
- index_info->m_index_flags = rdb_netbuf_to_uint32(ptr);
- ptr += RDB_SIZEOF_INDEX_FLAGS;
- index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
- found = true;
- break;
- case Rdb_key_def::INDEX_INFO_VERSION_TTL:
- /* Sanity check to prevent reading bogus into TTL record. */
- if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
- RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
- ROCKSDB_SIZEOF_TTL_RECORD) {
- error = true;
+ case Rdb_key_def::INDEX_INFO_VERSION_TTL:
+ /* Sanity check to prevent reading bogus into TTL record. */
+ if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
+ RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
+ ROCKSDB_SIZEOF_TTL_RECORD) {
+ error = true;
+ break;
+ }
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ ptr += RDB_SIZEOF_KV_VERSION;
+ index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ if ((index_info->m_kv_version ==
+ Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) &&
+ index_info->m_ttl_duration > 0) {
+ index_info->m_index_flags = Rdb_key_def::TTL_FLAG;
+ }
+ found = true;
break;
- }
- index_info->m_index_type = rdb_netbuf_to_byte(ptr);
- ptr += RDB_SIZEOF_INDEX_TYPE;
- index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
- ptr += RDB_SIZEOF_KV_VERSION;
- index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
- if ((index_info->m_kv_version ==
- Rdb_key_def::PRIMARY_FORMAT_VERSION_TTL) &&
- index_info->m_ttl_duration > 0) {
- index_info->m_index_flags = Rdb_key_def::TTL_FLAG;
- }
- found = true;
- break;
- case Rdb_key_def::INDEX_INFO_VERSION_VERIFY_KV_FORMAT:
- case Rdb_key_def::INDEX_INFO_VERSION_GLOBAL_ID:
- index_info->m_index_type = rdb_netbuf_to_byte(ptr);
- ptr += RDB_SIZEOF_INDEX_TYPE;
- index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
- found = true;
- break;
+ case Rdb_key_def::INDEX_INFO_VERSION_VERIFY_KV_FORMAT:
+ case Rdb_key_def::INDEX_INFO_VERSION_GLOBAL_ID:
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ found = true;
+ break;
- default:
- error = true;
- break;
+ default:
+ error = true;
+ break;
}
switch (index_info->m_index_type) {
- case Rdb_key_def::INDEX_TYPE_PRIMARY:
- case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: {
- error =
- index_info->m_kv_version > Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST;
- break;
- }
- case Rdb_key_def::INDEX_TYPE_SECONDARY:
- error = index_info->m_kv_version >
- Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST;
- break;
- default:
- error = true;
- break;
+ case Rdb_key_def::INDEX_TYPE_PRIMARY:
+ case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: {
+ error = index_info->m_kv_version >
+ Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST;
+ break;
+ }
+ case Rdb_key_def::INDEX_TYPE_SECONDARY:
+ error = index_info->m_kv_version >
+ Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST;
+ break;
+ default:
+ error = true;
+ break;
}
}
@@ -4849,20 +4939,18 @@ bool Rdb_dict_manager::get_index_info(
return found;
}
-bool Rdb_dict_manager::get_cf_flags(const uint32_t &cf_id,
+bool Rdb_dict_manager::get_cf_flags(const uint32_t cf_id,
uint32_t *const cf_flags) const {
DBUG_ASSERT(cf_flags != nullptr);
bool found = false;
std::string value;
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0};
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 2> key_writer;
- rdb_netbuf_store_uint32(key_buf, Rdb_key_def::CF_DEFINITION);
- rdb_netbuf_store_uint32(key_buf + Rdb_key_def::INDEX_NUMBER_SIZE, cf_id);
+ key_writer.write_uint32(Rdb_key_def::CF_DEFINITION);
+ key_writer.write_uint32(cf_id);
- const rocksdb::Slice key =
- rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
- const rocksdb::Status status = get_value(key, &value);
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
if (status.ok()) {
const uchar *val = (const uchar *)value.c_str();
@@ -4890,10 +4978,9 @@ void Rdb_dict_manager::get_ongoing_index_operation(
DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ||
dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
- uchar index_buf[Rdb_key_def::INDEX_NUMBER_SIZE];
- rdb_netbuf_store_uint32(index_buf, dd_type);
- const rocksdb::Slice index_slice(reinterpret_cast<char *>(index_buf),
- Rdb_key_def::INDEX_NUMBER_SIZE);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE> index_writer;
+ index_writer.write_uint32(dd_type);
+ const rocksdb::Slice index_slice = index_writer.to_slice();
rocksdb::Iterator *it = new_iterator();
for (it->Seek(index_slice); it->Valid(); it->Next()) {
@@ -4937,11 +5024,10 @@ bool Rdb_dict_manager::is_index_operation_ongoing(
bool found = false;
std::string value;
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- dump_index_id(key_buf, dd_type, gl_index_id);
- const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, dd_type, gl_index_id);
- const rocksdb::Status status = get_value(key, &value);
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
if (status.ok()) {
found = true;
}
@@ -4958,23 +5044,19 @@ void Rdb_dict_manager::start_ongoing_index_operation(
DBUG_ASSERT(dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING ||
dd_type == Rdb_key_def::DDL_CREATE_INDEX_ONGOING);
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- uchar value_buf[Rdb_key_def::VERSION_SIZE] = {0};
- dump_index_id(key_buf, dd_type, gl_index_id);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ Rdb_buf_writer<Rdb_key_def::VERSION_SIZE> value_writer;
+
+ dump_index_id(&key_writer, dd_type, gl_index_id);
// version as needed
if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) {
- rdb_netbuf_store_uint16(value_buf,
- Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION);
+ value_writer.write_uint16(Rdb_key_def::DDL_DROP_INDEX_ONGOING_VERSION);
} else {
- rdb_netbuf_store_uint16(value_buf,
- Rdb_key_def::DDL_CREATE_INDEX_ONGOING_VERSION);
+ value_writer.write_uint16(Rdb_key_def::DDL_CREATE_INDEX_ONGOING_VERSION);
}
- const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
- const rocksdb::Slice value =
- rocksdb::Slice((char *)value_buf, sizeof(value_buf));
- batch->Put(m_system_cfh, key, value);
+ batch->Put(m_system_cfh, key_writer.to_slice(), value_writer.to_slice());
}
/*
@@ -5006,7 +5088,7 @@ bool Rdb_dict_manager::is_drop_index_empty() const {
all associated indexes to be removed
*/
void Rdb_dict_manager::add_drop_table(
- std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys,
+ std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 n_keys,
rocksdb::WriteBatch *const batch) const {
std::unordered_set<GL_INDEX_ID> dropped_index_ids;
for (uint32 i = 0; i < n_keys; i++) {
@@ -5100,12 +5182,13 @@ void Rdb_dict_manager::resume_drop_indexes() const {
for (const auto &gl_index_id : gl_index_ids) {
log_start_drop_index(gl_index_id, "Resume");
if (max_index_id_in_dict < gl_index_id.index_id) {
- sql_print_error("RocksDB: Found max index id %u from data dictionary "
- "but also found dropped index id (%u,%u) from drop_index "
- "dictionary. This should never happen and is possibly a "
- "bug.",
- max_index_id_in_dict, gl_index_id.cf_id,
- gl_index_id.index_id);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Found max index id %u from data dictionary "
+ "but also found dropped index id (%u,%u) from drop_index "
+ "dictionary. This should never happen and is possibly a "
+ "bug.",
+ max_index_id_in_dict, gl_index_id.cf_id, gl_index_id.index_id);
abort();
}
}
@@ -5130,7 +5213,7 @@ void Rdb_dict_manager::rollback_ongoing_index_creation() const {
}
void Rdb_dict_manager::log_start_drop_table(
- const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys,
+ const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 n_keys,
const char *const log_action) const {
for (uint32 i = 0; i < n_keys; i++) {
log_start_drop_index(key_descr[i]->get_gl_index_id(), log_action);
@@ -5151,10 +5234,12 @@ void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id,
if (!incomplete_create_indexes.count(gl_index_id)) {
/* If it's not a partially created index, something is very wrong. */
- sql_print_error("RocksDB: Failed to get column family info "
- "from index id (%u,%u). MyRocks data dictionary may "
- "get corrupted.",
- gl_index_id.cf_id, gl_index_id.index_id);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Failed to get column family info "
+ "from index id (%u,%u). MyRocks data dictionary may "
+ "get corrupted.",
+ gl_index_id.cf_id, gl_index_id.index_id);
abort();
}
}
@@ -5167,7 +5252,7 @@ bool Rdb_dict_manager::get_max_index_id(uint32_t *const index_id) const {
const rocksdb::Status status = get_value(m_key_slice_max_index_id, &value);
if (status.ok()) {
const uchar *const val = (const uchar *)value.c_str();
- const uint16_t &version = rdb_netbuf_to_uint16(val);
+ const uint16_t version = rdb_netbuf_to_uint16(val);
if (version == Rdb_key_def::MAX_INDEX_ID_VERSION) {
*index_id = rdb_netbuf_to_uint32(val + Rdb_key_def::VERSION_SIZE);
found = true;
@@ -5177,27 +5262,28 @@ bool Rdb_dict_manager::get_max_index_id(uint32_t *const index_id) const {
}
bool Rdb_dict_manager::update_max_index_id(rocksdb::WriteBatch *const batch,
- const uint32_t &index_id) const {
+ const uint32_t index_id) const {
DBUG_ASSERT(batch != nullptr);
uint32_t old_index_id = -1;
if (get_max_index_id(&old_index_id)) {
if (old_index_id > index_id) {
- sql_print_error("RocksDB: Found max index id %u from data dictionary "
- "but trying to update to older value %u. This should "
- "never happen and possibly a bug.",
- old_index_id, index_id);
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Found max index id %u from data dictionary "
+ "but trying to update to older value %u. This should "
+ "never happen and possibly a bug.",
+ old_index_id, index_id);
return true;
}
}
- uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE] =
- {0};
- rdb_netbuf_store_uint16(value_buf, Rdb_key_def::MAX_INDEX_ID_VERSION);
- rdb_netbuf_store_uint32(value_buf + Rdb_key_def::VERSION_SIZE, index_id);
- const rocksdb::Slice value =
- rocksdb::Slice((char *)value_buf, sizeof(value_buf));
- batch->Put(m_system_cfh, m_key_slice_max_index_id, value);
+ Rdb_buf_writer<Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE>
+ value_writer;
+ value_writer.write_uint16(Rdb_key_def::MAX_INDEX_ID_VERSION);
+ value_writer.write_uint32(index_id);
+
+ batch->Put(m_system_cfh, m_key_slice_max_index_id, value_writer.to_slice());
return false;
}
@@ -5207,27 +5293,24 @@ void Rdb_dict_manager::add_stats(
DBUG_ASSERT(batch != nullptr);
for (const auto &it : stats) {
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- dump_index_id(key_buf, Rdb_key_def::INDEX_STATISTICS, it.m_gl_index_id);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_STATISTICS, it.m_gl_index_id);
// IndexStats::materialize takes complete care of serialization including
// storing the version
const auto value =
Rdb_index_stats::materialize(std::vector<Rdb_index_stats>{it});
- batch->Put(m_system_cfh, rocksdb::Slice((char *)key_buf, sizeof(key_buf)),
- value);
+ batch->Put(m_system_cfh, key_writer.to_slice(), value);
}
}
Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const {
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- dump_index_id(key_buf, Rdb_key_def::INDEX_STATISTICS, gl_index_id);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::INDEX_STATISTICS, gl_index_id);
std::string value;
- const rocksdb::Status status = get_value(
- rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)),
- &value);
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
if (status.ok()) {
std::vector<Rdb_index_stats> v;
// unmaterialize checks if the version matches
@@ -5239,41 +5322,34 @@ Rdb_index_stats Rdb_dict_manager::get_stats(GL_INDEX_ID gl_index_id) const {
return Rdb_index_stats();
}
-rocksdb::Status
-Rdb_dict_manager::put_auto_incr_val(rocksdb::WriteBatchBase *batch,
- const GL_INDEX_ID &gl_index_id,
- ulonglong val, bool overwrite) const {
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id);
- const rocksdb::Slice key =
- rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
+rocksdb::Status Rdb_dict_manager::put_auto_incr_val(
+ rocksdb::WriteBatchBase *batch, const GL_INDEX_ID &gl_index_id,
+ ulonglong val, bool overwrite) const {
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::AUTO_INC, gl_index_id);
// Value is constructed by storing the version and the value.
- uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION +
- ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0};
- uchar *ptr = value_buf;
- rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION);
- ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION;
- rdb_netbuf_store_uint64(ptr, val);
- ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE;
- const rocksdb::Slice value =
- rocksdb::Slice(reinterpret_cast<char *>(value_buf), ptr - value_buf);
+ Rdb_buf_writer<RDB_SIZEOF_AUTO_INCREMENT_VERSION +
+ ROCKSDB_SIZEOF_AUTOINC_VALUE>
+ value_writer;
+ value_writer.write_uint16(Rdb_key_def::AUTO_INCREMENT_VERSION);
+ value_writer.write_uint64(val);
if (overwrite) {
- return batch->Put(m_system_cfh, key, value);
+ return batch->Put(m_system_cfh, key_writer.to_slice(),
+ value_writer.to_slice());
}
- return batch->Merge(m_system_cfh, key, value);
+ return batch->Merge(m_system_cfh, key_writer.to_slice(),
+ value_writer.to_slice());
}
bool Rdb_dict_manager::get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
ulonglong *new_val) const {
- uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
- dump_index_id(key_buf, Rdb_key_def::AUTO_INC, gl_index_id);
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE * 3> key_writer;
+ dump_index_id(&key_writer, Rdb_key_def::AUTO_INC, gl_index_id);
std::string value;
- const rocksdb::Status status = get_value(
- rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf)),
- &value);
+ const rocksdb::Status status = get_value(key_writer.to_slice(), &value);
if (status.ok()) {
const uchar *const val = reinterpret_cast<const uchar *>(value.data());
@@ -5307,4 +5383,4 @@ uint Rdb_seq_generator::get_and_update_next_number(
return res;
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index d24ffeb0691..c349c527836 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -22,6 +22,7 @@
#include <map>
#include <mutex>
#include <string>
+#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
@@ -48,6 +49,27 @@ class Rdb_ddl_manager;
const uint32_t GTID_BUF_LEN = 60;
+class Rdb_convert_to_record_key_decoder {
+ public:
+ Rdb_convert_to_record_key_decoder() = default;
+ Rdb_convert_to_record_key_decoder(
+ const Rdb_convert_to_record_key_decoder &decoder) = delete;
+ Rdb_convert_to_record_key_decoder &operator=(
+ const Rdb_convert_to_record_key_decoder &decoder) = delete;
+ static int decode(uchar *const buf, uint *offset, Rdb_field_packing *fpi,
+ TABLE *table, Field *field, bool has_unpack_info,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unpack_reader);
+ static int skip(const Rdb_field_packing *fpi, const Field *field,
+ Rdb_string_reader *reader, Rdb_string_reader *unpack_reader);
+
+ private:
+ static int decode_field(Rdb_field_packing *fpi, Field *field,
+ Rdb_string_reader *reader,
+ const uchar *const default_value,
+ Rdb_string_reader *unpack_reader);
+};
+
/*
@brief
Field packing context.
@@ -63,7 +85,7 @@ const uint32_t GTID_BUF_LEN = 60;
unpack_info is passed as context data between the two.
*/
class Rdb_pack_field_context {
-public:
+ public:
Rdb_pack_field_context(const Rdb_pack_field_context &) = delete;
Rdb_pack_field_context &operator=(const Rdb_pack_field_context &) = delete;
@@ -74,6 +96,45 @@ public:
Rdb_string_writer *writer;
};
+class Rdb_key_field_iterator {
+ private:
+ Rdb_field_packing *m_pack_info;
+ int m_iter_index;
+ int m_iter_end;
+ TABLE *m_table;
+ Rdb_string_reader *m_reader;
+ Rdb_string_reader *m_unp_reader;
+ uint m_curr_bitmap_pos;
+ const MY_BITMAP *m_covered_bitmap;
+ uchar *m_buf;
+ bool m_has_unpack_info;
+ const Rdb_key_def *m_key_def;
+ bool m_secondary_key;
+ bool m_hidden_pk_exists;
+ bool m_is_hidden_pk;
+ bool m_is_null;
+ Field *m_field;
+ uint m_offset;
+ Rdb_field_packing *m_fpi;
+
+ public:
+ Rdb_key_field_iterator(const Rdb_key_field_iterator &) = delete;
+ Rdb_key_field_iterator &operator=(const Rdb_key_field_iterator &) = delete;
+ Rdb_key_field_iterator(const Rdb_key_def *key_def,
+ Rdb_field_packing *pack_info,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unp_reader, TABLE *table,
+ bool has_unpack_info, const MY_BITMAP *covered_bitmap,
+ uchar *buf);
+
+ int next();
+ bool has_next();
+ bool get_is_null() const;
+ Field *get_field() const;
+ int get_field_index() const;
+ void *get_dst() const;
+};
+
struct Rdb_collation_codec;
struct Rdb_index_info;
@@ -81,18 +142,19 @@ struct Rdb_index_info;
C-style "virtual table" allowing different handling of packing logic based
on the field type. See Rdb_field_packing::setup() implementation.
*/
-using rdb_make_unpack_info_t =
- void (Rdb_key_def::*)(const Rdb_collation_codec *codec, const Field *field,
- Rdb_pack_field_context *pack_ctx) const;
-using rdb_index_field_unpack_t = int (Rdb_key_def::*)(
- Rdb_field_packing *fpi, Field *field, uchar *field_ptr,
- Rdb_string_reader *reader, Rdb_string_reader *unpack_reader) const;
-using rdb_index_field_skip_t =
- int (Rdb_key_def::*)(const Rdb_field_packing *fpi, const Field *field,
- Rdb_string_reader *reader) const;
-using rdb_index_field_pack_t =
- void (Rdb_key_def::*)(Rdb_field_packing *fpi, Field *field, uchar *buf,
- uchar **dst, Rdb_pack_field_context *pack_ctx) const;
+using rdb_make_unpack_info_t = void (*)(const Rdb_collation_codec *codec,
+ const Field *field,
+ Rdb_pack_field_context *pack_ctx);
+using rdb_index_field_unpack_t = int (*)(Rdb_field_packing *fpi, Field *field,
+ uchar *field_ptr,
+ Rdb_string_reader *reader,
+ Rdb_string_reader *unpack_reader);
+using rdb_index_field_skip_t = int (*)(const Rdb_field_packing *fpi,
+ const Field *field,
+ Rdb_string_reader *reader);
+using rdb_index_field_pack_t = void (*)(Rdb_field_packing *fpi, Field *field,
+ uchar *buf, uchar **dst,
+ Rdb_pack_field_context *pack_ctx);
const uint RDB_INVALID_KEY_LEN = uint(-1);
@@ -187,7 +249,7 @@ enum {
*/
class Rdb_key_def {
-public:
+ public:
/* Convert a key from KeyTupleFormat to mem-comparable form */
uint pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer,
uchar *const packed_tuple, const uchar *const key_tuple,
@@ -202,23 +264,17 @@ public:
uint pack_record(const TABLE *const tbl, uchar *const pack_buffer,
const uchar *const record, uchar *const packed_tuple,
Rdb_string_writer *const unpack_info,
- const bool &should_store_row_debug_checksums,
- const longlong &hidden_pk_id = 0, uint n_key_parts = 0,
+ const bool should_store_row_debug_checksums,
+ const longlong hidden_pk_id = 0, uint n_key_parts = 0,
uint *const n_null_fields = nullptr,
- uint *const ttl_pk_offset = nullptr,
const char *const ttl_bytes = nullptr) const;
/* Pack the hidden primary key into mem-comparable form. */
- uint pack_hidden_pk(const longlong &hidden_pk_id,
+ uint pack_hidden_pk(const longlong hidden_pk_id,
uchar *const packed_tuple) const;
- int unpack_field(Rdb_field_packing *const fpi,
- Field *const field,
- Rdb_string_reader* reader,
- const uchar *const default_value,
- Rdb_string_reader* unp_reader) const;
int unpack_record(TABLE *const table, uchar *const buf,
const rocksdb::Slice *const packed_key,
const rocksdb::Slice *const unpack_info,
- const bool &verify_row_debug_checksums) const;
+ const bool verify_row_debug_checksums) const;
static bool unpack_info_has_checksum(const rocksdb::Slice &unpack_info);
int compare_keys(const rocksdb::Slice *key1, const rocksdb::Slice *key2,
@@ -240,33 +296,67 @@ public:
/*
Get the first key that you need to position at to start iterating.
-
Stores into *key a "supremum" or "infimum" key value for the index.
-
+ @parameters key OUT Big Endian, value is m_index_number or
+ m_index_number + 1
+ @parameters size OUT key size, value is INDEX_NUMBER_SIZE
@return Number of bytes in the key that are usable for bloom filter use.
*/
inline int get_first_key(uchar *const key, uint *const size) const {
- if (m_is_reverse_cf)
+ if (m_is_reverse_cf) {
get_supremum_key(key, size);
- else
+ /* Find out how many bytes of infimum are the same as m_index_number */
+ uchar unmodified_key[INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(unmodified_key, m_index_number);
+ int i;
+ for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
+ if (key[i] != unmodified_key[i]) {
+ break;
+ }
+ }
+ return i;
+ } else {
get_infimum_key(key, size);
+ // For infimum key, its value will be m_index_number
+ // Thus return its own size instead.
+ return INDEX_NUMBER_SIZE;
+ }
+ }
+
+ /*
+ The same as get_first_key, but get the key for the last entry in the index
+ @parameters key OUT Big Endian, value is m_index_number or
+ m_index_number + 1
+ @parameters size OUT key size, value is INDEX_NUMBER_SIZE
- /* Find out how many bytes of infimum are the same as m_index_number */
- uchar unmodified_key[INDEX_NUMBER_SIZE];
- rdb_netbuf_store_index(unmodified_key, m_index_number);
- int i;
- for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
- if (key[i] != unmodified_key[i])
- break;
+ @return Number of bytes in the key that are usable for bloom filter use.
+ */
+ inline int get_last_key(uchar *const key, uint *const size) const {
+ if (m_is_reverse_cf) {
+ get_infimum_key(key, size);
+ // For infimum key, its value will be m_index_number
+ // Thus return its own size instead.
+ return INDEX_NUMBER_SIZE;
+ } else {
+ get_supremum_key(key, size);
+ /* Find out how many bytes are the same as m_index_number */
+ uchar unmodified_key[INDEX_NUMBER_SIZE];
+ rdb_netbuf_store_index(unmodified_key, m_index_number);
+ int i;
+ for (i = 0; i < INDEX_NUMBER_SIZE; i++) {
+ if (key[i] != unmodified_key[i]) {
+ break;
+ }
+ }
+ return i;
}
- return i;
}
/* Make a key that is right after the given key. */
- static int successor(uchar *const packed_tuple, const uint &len);
+ static int successor(uchar *const packed_tuple, const uint len);
/* Make a key that is right before the given key. */
- static int predecessor(uchar *const packed_tuple, const uint &len);
+ static int predecessor(uchar *const packed_tuple, const uint len);
/*
This can be used to compare prefixes.
@@ -282,19 +372,18 @@ public:
/* Check if given mem-comparable key belongs to this index */
bool covers_key(const rocksdb::Slice &slice) const {
- if (slice.size() < INDEX_NUMBER_SIZE)
- return false;
+ if (slice.size() < INDEX_NUMBER_SIZE) return false;
- if (memcmp(slice.data(), m_index_number_storage_form, INDEX_NUMBER_SIZE))
+ if (memcmp(slice.data(), m_index_number_storage_form, INDEX_NUMBER_SIZE)) {
return false;
+ }
return true;
}
void get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const;
- bool covers_lookup(TABLE *const table,
- const rocksdb::Slice *const unpack_info,
+ bool covers_lookup(const rocksdb::Slice *const unpack_info,
const MY_BITMAP *const map) const;
inline bool use_covered_bitmap_format() const {
@@ -302,6 +391,9 @@ public:
m_kv_format_version >= SECONDARY_FORMAT_VERSION_UPDATE3;
}
+ /* Indicates that all key parts can be unpacked to cover a secondary lookup */
+ bool can_cover_lookup() const;
+
/*
Return true if the passed mem-comparable key
- is from this index, and
@@ -339,7 +431,7 @@ public:
uint get_key_parts() const { return m_key_parts; }
- uint get_ttl_field_offset() const { return m_ttl_field_offset; }
+ uint get_ttl_field_index() const { return m_ttl_field_index; }
/*
Get a field object for key part #part_no
@@ -377,7 +469,7 @@ public:
VERSION_SIZE = 2,
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
- PACKED_SIZE = 4, // one int
+ PACKED_SIZE = 4, // one int
};
// bit flags for combining bools when writing to disk
@@ -505,7 +597,7 @@ public:
uint64 *ttl_duration);
static uint extract_ttl_col(const TABLE *const table_arg,
const Rdb_tbl_def *const tbl_def_arg,
- std::string *ttl_column, uint *ttl_field_offset,
+ std::string *ttl_column, uint *ttl_field_index,
bool skip_checks = false);
inline bool has_ttl() const { return m_ttl_duration > 0; }
@@ -517,15 +609,14 @@ public:
const uchar *const val,
enum INDEX_FLAG flag) const;
- static const std::string
- gen_qualifier_for_table(const char *const qualifier,
- const std::string &partition_name = "");
- static const std::string
- gen_cf_name_qualifier_for_partition(const std::string &s);
- static const std::string
- gen_ttl_duration_qualifier_for_partition(const std::string &s);
- static const std::string
- gen_ttl_col_qualifier_for_partition(const std::string &s);
+ static const std::string gen_qualifier_for_table(
+ const char *const qualifier, const std::string &partition_name = "");
+ static const std::string gen_cf_name_qualifier_for_partition(
+ const std::string &s);
+ static const std::string gen_ttl_duration_qualifier_for_partition(
+ const std::string &s);
+ static const std::string gen_ttl_col_qualifier_for_partition(
+ const std::string &s);
static const std::string parse_comment_for_qualifier(
const std::string &comment, const TABLE *const table_arg,
@@ -535,133 +626,133 @@ public:
rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; }
/* Check if keypart #kp can be unpacked from index tuple */
- inline bool can_unpack(const uint &kp) const;
+ inline bool can_unpack(const uint kp) const;
/* Check if keypart #kp needs unpack info */
- inline bool has_unpack_info(const uint &kp) const;
+ inline bool has_unpack_info(const uint kp) const;
/* Check if given table has a primary key */
static bool table_has_hidden_pk(const TABLE *const table);
- void report_checksum_mismatch(const bool &is_key, const char *const data,
+ void report_checksum_mismatch(const bool is_key, const char *const data,
const size_t data_size) const;
/* Check if index is at least pk_min if it is a PK,
or at least sk_min if SK.*/
- bool index_format_min_check(const int &pk_min, const int &sk_min) const;
+ bool index_format_min_check(const int pk_min, const int sk_min) const;
- void pack_with_make_sort_key(
+ static void pack_with_make_sort_key(
Rdb_field_packing *const fpi, Field *const field,
uchar *buf MY_ATTRIBUTE((__unused__)), uchar **dst,
- Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const;
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__)));
- void pack_with_varchar_encoding(
+ static void pack_with_varchar_encoding(
Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
- Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const;
-
- void
- pack_with_varchar_space_pad(Rdb_field_packing *const fpi, Field *const field,
- uchar *buf, uchar **dst,
- Rdb_pack_field_context *const pack_ctx) const;
-
- int unpack_integer(Rdb_field_packing *const fpi, Field *const field,
- uchar *const to, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) const;
-
- int unpack_double(Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
- Field *const field MY_ATTRIBUTE((__unused__)),
- uchar *const field_ptr, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) const;
-
- int unpack_float(Rdb_field_packing *const fpi,
- Field *const field MY_ATTRIBUTE((__unused__)),
- uchar *const field_ptr, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) const;
-
- int unpack_binary_str(Rdb_field_packing *const fpi, Field *const field,
- uchar *const to, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) const;
-
- int unpack_binary_or_utf8_varchar(
- Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__)));
+
+ static void pack_with_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx);
+
+ static int unpack_integer(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const to, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_double(
+ Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
+ Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_float(
+ Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const;
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
+
+ static int unpack_binary_str(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const to, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__)));
- int unpack_binary_or_utf8_varchar_space_pad(
+ static int unpack_binary_or_utf8_varchar(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const;
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
- int unpack_newdate(Rdb_field_packing *const fpi,
- Field *const field MY_ATTRIBUTE((__unused__)),
- uchar *const field_ptr, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) const;
+ static int unpack_binary_or_utf8_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader);
- int unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field,
- uchar *dst, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) const;
+ static int unpack_newdate(
+ Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__)));
- int unpack_unknown_varchar(Rdb_field_packing *const fpi, Field *const field,
+ static int unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field,
uchar *dst, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const;
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__)));
- int unpack_simple_varchar_space_pad(
+ static int unpack_unknown_varchar(Rdb_field_packing *const fpi,
+ Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader);
+
+ static int unpack_simple_varchar_space_pad(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
- Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const;
+ Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader);
- int unpack_simple(Rdb_field_packing *const fpi,
- Field *const field MY_ATTRIBUTE((__unused__)),
- uchar *const dst, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const;
+ static int unpack_simple(Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader);
- int unpack_unknown(Rdb_field_packing *const fpi, Field *const field,
- uchar *const dst, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) const;
+ static int unpack_unknown(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader);
- int unpack_floating_point(uchar *const dst, Rdb_string_reader *const reader,
- const size_t &size, const int &exp_digit,
- const uchar *const zero_pattern,
- const uchar *const zero_val,
- void (*swap_func)(uchar *, const uchar *)) const;
+ static int unpack_floating_point(uchar *const dst,
+ Rdb_string_reader *const reader,
+ const size_t size, const int exp_digit,
+ const uchar *const zero_pattern,
+ const uchar *const zero_val,
+ void (*swap_func)(uchar *, const uchar *));
- void make_unpack_simple_varchar(const Rdb_collation_codec *const codec,
- const Field *const field,
- Rdb_pack_field_context *const pack_ctx) const;
+ static void make_unpack_simple_varchar(
+ const Rdb_collation_codec *const codec, const Field *const field,
+ Rdb_pack_field_context *const pack_ctx);
- void make_unpack_simple(const Rdb_collation_codec *const codec,
- const Field *const field,
- Rdb_pack_field_context *const pack_ctx) const;
+ static void make_unpack_simple(const Rdb_collation_codec *const codec,
+ const Field *const field,
+ Rdb_pack_field_context *const pack_ctx);
- void make_unpack_unknown(
+ static void make_unpack_unknown(
const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_pack_field_context *const pack_ctx) const;
+ const Field *const field, Rdb_pack_field_context *const pack_ctx);
- void make_unpack_unknown_varchar(
+ static void make_unpack_unknown_varchar(
const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_pack_field_context *const pack_ctx) const;
+ const Field *const field, Rdb_pack_field_context *const pack_ctx);
- void dummy_make_unpack_info(
+ static void dummy_make_unpack_info(
const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
const Field *field MY_ATTRIBUTE((__unused__)),
- Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) const;
+ Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__)));
- int skip_max_length(const Rdb_field_packing *const fpi,
- const Field *const field MY_ATTRIBUTE((__unused__)),
- Rdb_string_reader *const reader) const;
+ static int skip_max_length(const Rdb_field_packing *const fpi,
+ const Field *const field
+ MY_ATTRIBUTE((__unused__)),
+ Rdb_string_reader *const reader);
- int skip_variable_length(
- const Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_string_reader *const reader) const;
+ static int skip_variable_length(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader);
- int skip_variable_space_pad(const Rdb_field_packing *const fpi,
- const Field *const field,
- Rdb_string_reader *const reader) const;
+ static int skip_variable_space_pad(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader);
inline bool use_legacy_varbinary_format() const {
return !index_format_min_check(PRIMARY_FORMAT_VERSION_UPDATE2,
@@ -674,11 +765,11 @@ public:
private:
#ifndef DBUG_OFF
- inline bool is_storage_available(const int &offset, const int &needed) const {
+ inline bool is_storage_available(const int offset, const int needed) const {
const int storage_length = static_cast<int>(max_storage_fmt_length());
return (storage_length - offset) >= needed;
}
-#endif // DBUG_OFF
+#endif // DBUG_OFF
/* Global number of this index (used as prefix in StorageFormat) */
const uint32 m_index_number;
@@ -687,15 +778,15 @@ public:
rocksdb::ColumnFamilyHandle *m_cf_handle;
- void pack_legacy_variable_format(const uchar *src, size_t src_len,
- uchar **dst) const;
+ static void pack_legacy_variable_format(const uchar *src, size_t src_len,
+ uchar **dst);
- void pack_variable_format(const uchar *src, size_t src_len,
- uchar **dst) const;
+ static void pack_variable_format(const uchar *src, size_t src_len,
+ uchar **dst);
- uint calc_unpack_legacy_variable_format(uchar flag, bool *done) const;
+ static uint calc_unpack_legacy_variable_format(uchar flag, bool *done);
- uint calc_unpack_variable_format(uchar flag, bool *done) const;
+ static uint calc_unpack_variable_format(uchar flag, bool *done);
public:
uint16_t m_index_dict_version;
@@ -734,8 +825,6 @@ public:
std::string m_ttl_column;
private:
- friend class Rdb_tbl_def; // for m_index_number above
-
/* Number of key parts in the primary key*/
uint m_pk_key_parts;
@@ -766,7 +855,7 @@ public:
Index of the TTL column in table->s->fields, if it exists.
Default is UINT_MAX to denote that it does not exist.
*/
- uint m_ttl_field_offset;
+ uint m_ttl_field_index;
/* Prefix extractor for the column family of the key definiton */
std::shared_ptr<const rocksdb::SliceTransform> m_prefix_extractor;
@@ -814,7 +903,7 @@ extern std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE>
rdb_collation_data;
class Rdb_field_packing {
-public:
+ public:
Rdb_field_packing(const Rdb_field_packing &) = delete;
Rdb_field_packing &operator=(const Rdb_field_packing &) = delete;
Rdb_field_packing() = default;
@@ -832,9 +921,10 @@ public:
Valid only for VARCHAR fields.
*/
const CHARSET_INFO *m_varchar_charset;
+ bool m_use_legacy_varbinary_format;
// (Valid when Variable Length Space Padded Encoding is used):
- uint m_segment_size; // size of segment used
+ uint m_segment_size; // size of segment used
// number of bytes used to store number of trimmed (or added)
// spaces in the upack_info
@@ -877,7 +967,7 @@ public:
*/
rdb_index_field_skip_t m_skip_func;
-private:
+ private:
/*
Location of the field in the table (key number and key part number).
@@ -903,12 +993,12 @@ private:
uint m_keynr;
uint m_key_part;
-public:
+ public:
bool setup(const Rdb_key_def *const key_descr, const Field *const field,
- const uint &keynr_arg, const uint &key_part_arg,
- const uint16 &key_length);
+ const uint keynr_arg, const uint key_part_arg,
+ const uint16 key_length);
Field *get_field_in_table(const TABLE *const tbl) const;
- void fill_hidden_pk_val(uchar **dst, const longlong &hidden_pk_id) const;
+ void fill_hidden_pk_val(uchar **dst, const longlong hidden_pk_id) const;
};
/*
@@ -919,7 +1009,7 @@ public:
For encoding/decoding of index tuples, see Rdb_key_def.
*/
class Rdb_field_encoder {
-public:
+ public:
Rdb_field_encoder(const Rdb_field_encoder &) = delete;
Rdb_field_encoder &operator=(const Rdb_field_encoder &) = delete;
/*
@@ -940,7 +1030,7 @@ public:
uint m_null_offset;
uint16 m_field_index;
- uchar m_null_mask; // 0 means the field cannot be null
+ uchar m_null_mask; // 0 means the field cannot be null
my_core::enum_field_types m_field_type;
@@ -960,12 +1050,12 @@ inline Field *Rdb_key_def::get_table_field_for_part_no(TABLE *table,
return m_pack_info[part_no].get_field_in_table(table);
}
-inline bool Rdb_key_def::can_unpack(const uint &kp) const {
+inline bool Rdb_key_def::can_unpack(const uint kp) const {
DBUG_ASSERT(kp < m_key_parts);
return (m_pack_info[kp].m_unpack_func != nullptr);
}
-inline bool Rdb_key_def::has_unpack_info(const uint &kp) const {
+inline bool Rdb_key_def::has_unpack_info(const uint kp) const {
DBUG_ASSERT(kp < m_key_parts);
return m_pack_info[kp].uses_unpack_info();
}
@@ -980,7 +1070,7 @@ inline bool Rdb_key_def::has_unpack_info(const uint &kp) const {
*/
class Rdb_tbl_def {
-private:
+ private:
void check_if_is_mysql_system_table();
/* Stores 'dbname.tablename' */
@@ -993,7 +1083,7 @@ private:
void set_name(const std::string &name);
-public:
+ public:
Rdb_tbl_def(const Rdb_tbl_def &) = delete;
Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete;
@@ -1002,18 +1092,20 @@ public:
set_name(name);
}
- Rdb_tbl_def(const char *const name, const size_t &len)
+ Rdb_tbl_def(const char *const name, const size_t len)
: m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(std::string(name, len));
}
- explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0)
+ explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t pos = 0)
: m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
set_name(std::string(slice.data() + pos, slice.size() - pos));
}
~Rdb_tbl_def();
+ void check_and_set_read_free_rpl_table();
+
/* Number of indexes */
uint m_key_count;
@@ -1026,8 +1118,11 @@ public:
/* Is this a system table */
bool m_is_mysql_system_table;
+ /* Is this table read free repl enabled */
+ std::atomic_bool m_is_read_free_rpl_table{false};
+
bool put_dict(Rdb_dict_manager *const dict, rocksdb::WriteBatch *const batch,
- uchar *const key, const size_t &keylen);
+ const rocksdb::Slice &key);
const std::string &full_tablename() const { return m_dbname_tablename; }
const std::string &base_dbname() const { return m_dbname; }
@@ -1046,12 +1141,12 @@ class Rdb_seq_generator {
mysql_mutex_t m_mutex;
-public:
+ public:
Rdb_seq_generator(const Rdb_seq_generator &) = delete;
Rdb_seq_generator &operator=(const Rdb_seq_generator &) = delete;
Rdb_seq_generator() = default;
- void init(const uint &initial_number) {
+ void init(const uint initial_number) {
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
m_next_number = initial_number;
}
@@ -1076,14 +1171,17 @@ interface Rdb_tables_scanner {
class Rdb_ddl_manager {
Rdb_dict_manager *m_dict = nullptr;
- my_core::HASH m_ddl_hash; // Contains Rdb_tbl_def elements
+
+ // Contains Rdb_tbl_def elements
+ std::unordered_map<std::string, Rdb_tbl_def *> m_ddl_map;
+
// Maps index id to <table_name, index number>
std::map<GL_INDEX_ID, std::pair<std::string, uint>> m_index_num_to_keydef;
// Maps index id to key definitons not yet committed to data dictionary.
// This is mainly used to store key definitions during ALTER TABLE.
std::map<GL_INDEX_ID, std::shared_ptr<Rdb_key_def>>
- m_index_num_to_uncommitted_keydef;
+ m_index_num_to_uncommitted_keydef;
mysql_rwlock_t m_rwlock;
Rdb_seq_generator m_sequence;
@@ -1094,30 +1192,30 @@ class Rdb_ddl_manager {
const std::shared_ptr<Rdb_key_def> &find(GL_INDEX_ID gl_index_id);
-public:
+ public:
Rdb_ddl_manager(const Rdb_ddl_manager &) = delete;
Rdb_ddl_manager &operator=(const Rdb_ddl_manager &) = delete;
Rdb_ddl_manager() {}
/* Load the data dictionary from on-disk storage */
bool init(Rdb_dict_manager *const dict_arg, Rdb_cf_manager *const cf_manager,
- const uint32_t &validate_tables);
+ const uint32_t validate_tables);
void cleanup();
- Rdb_tbl_def *find(const std::string &table_name, const bool &lock = true);
+ Rdb_tbl_def *find(const std::string &table_name, const bool lock = true);
std::shared_ptr<const Rdb_key_def> safe_find(GL_INDEX_ID gl_index_id);
void set_stats(const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats);
void adjust_stats(const std::vector<Rdb_index_stats> &new_data,
const std::vector<Rdb_index_stats> &deleted_data =
std::vector<Rdb_index_stats>());
- void persist_stats(const bool &sync = false);
+ void persist_stats(const bool sync = false);
/* Modify the mapping and write it to on-disk storage */
int put_and_write(Rdb_tbl_def *const key_descr,
rocksdb::WriteBatch *const batch);
void remove(Rdb_tbl_def *const rec, rocksdb::WriteBatch *const batch,
- const bool &lock = true);
+ const bool lock = true);
bool rename(const std::string &from, const std::string &to,
rocksdb::WriteBatch *const batch);
@@ -1136,9 +1234,9 @@ public:
void remove_uncommitted_keydefs(
const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes);
-private:
+ private:
/* Put the data into in-memory table (only) */
- int put(Rdb_tbl_def *const key_descr, const bool &lock = true);
+ int put(Rdb_tbl_def *const key_descr, const bool lock = true);
/* Helper functions to be passed to my_core::HASH object */
static const uchar *get_hash_key(Rdb_tbl_def *const rec, size_t *const length,
@@ -1166,7 +1264,7 @@ private:
binlog_gtid
*/
class Rdb_binlog_manager {
-public:
+ public:
Rdb_binlog_manager(const Rdb_binlog_manager &) = delete;
Rdb_binlog_manager &operator=(const Rdb_binlog_manager &) = delete;
Rdb_binlog_manager() = default;
@@ -1177,18 +1275,15 @@ public:
rocksdb::WriteBatchBase *const batch);
bool read(char *const binlog_name, my_off_t *const binlog_pos,
char *const binlog_gtid) const;
- void update_slave_gtid_info(const uint &id, const char *const db,
+ void update_slave_gtid_info(const uint id, const char *const db,
const char *const gtid,
rocksdb::WriteBatchBase *const write_batch);
-private:
+ private:
Rdb_dict_manager *m_dict = nullptr;
- uchar m_key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0};
+ Rdb_buf_writer<Rdb_key_def::INDEX_NUMBER_SIZE> m_key_writer;
rocksdb::Slice m_key_slice;
- rocksdb::Slice pack_value(uchar *const buf, const char *const binlog_name,
- const my_off_t &binlog_pos,
- const char *const binlog_gtid) const;
bool unpack_value(const uchar *const value, size_t value_size,
char *const binlog_name,
my_off_t *const binlog_pos, char *const binlog_gtid) const;
@@ -1255,7 +1350,7 @@ private:
*/
class Rdb_dict_manager {
-private:
+ private:
mysql_mutex_t m_mutex;
rocksdb::TransactionDB *m_db = nullptr;
rocksdb::ColumnFamilyHandle *m_system_cfh = nullptr;
@@ -1267,18 +1362,27 @@ private:
static void dump_index_id(uchar *const netbuf,
Rdb_key_def::DATA_DICT_TYPE dict_type,
const GL_INDEX_ID &gl_index_id);
+ template <size_t T>
+ static void dump_index_id(Rdb_buf_writer<T> *buf_writer,
+ Rdb_key_def::DATA_DICT_TYPE dict_type,
+ const GL_INDEX_ID &gl_index_id) {
+ buf_writer->write_uint32(dict_type);
+ buf_writer->write_uint32(gl_index_id.cf_id);
+ buf_writer->write_uint32(gl_index_id.index_id);
+ }
+
void delete_with_prefix(rocksdb::WriteBatch *const batch,
Rdb_key_def::DATA_DICT_TYPE dict_type,
const GL_INDEX_ID &gl_index_id) const;
/* Functions for fast DROP TABLE/INDEX */
void resume_drop_indexes() const;
void log_start_drop_table(const std::shared_ptr<Rdb_key_def> *const key_descr,
- const uint32 &n_keys,
+ const uint32 n_keys,
const char *const log_action) const;
void log_start_drop_index(GL_INDEX_ID gl_index_id,
const char *log_action) const;
-public:
+ public:
Rdb_dict_manager(const Rdb_dict_manager &) = delete;
Rdb_dict_manager &operator=(const Rdb_dict_manager &) = delete;
Rdb_dict_manager() = default;
@@ -1298,7 +1402,7 @@ public:
/* Raw RocksDB operations */
std::unique_ptr<rocksdb::WriteBatch> begin() const;
- int commit(rocksdb::WriteBatch *const batch, const bool &sync = true) const;
+ int commit(rocksdb::WriteBatch *const batch, const bool sync = true) const;
rocksdb::Status get_value(const rocksdb::Slice &key,
std::string *const value) const;
void put_key(rocksdb::WriteBatchBase *const batch, const rocksdb::Slice &key,
@@ -1308,23 +1412,23 @@ public:
rocksdb::Iterator *new_iterator() const;
/* Internal Index id => CF */
- void
- add_or_update_index_cf_mapping(rocksdb::WriteBatch *batch,
- struct Rdb_index_info *const index_info) const;
+ void add_or_update_index_cf_mapping(
+ rocksdb::WriteBatch *batch,
+ struct Rdb_index_info *const index_info) const;
void delete_index_info(rocksdb::WriteBatch *batch,
const GL_INDEX_ID &index_id) const;
bool get_index_info(const GL_INDEX_ID &gl_index_id,
struct Rdb_index_info *const index_info) const;
/* CF id => CF flags */
- void add_cf_flags(rocksdb::WriteBatch *const batch, const uint &cf_id,
- const uint &cf_flags) const;
- bool get_cf_flags(const uint &cf_id, uint *const cf_flags) const;
+ void add_cf_flags(rocksdb::WriteBatch *const batch, const uint cf_id,
+ const uint cf_flags) const;
+ bool get_cf_flags(const uint cf_id, uint *const cf_flags) const;
/* Functions for fast CREATE/DROP TABLE/INDEX */
- void
- get_ongoing_index_operation(std::unordered_set<GL_INDEX_ID> *gl_index_ids,
- Rdb_key_def::DATA_DICT_TYPE dd_type) const;
+ void get_ongoing_index_operation(
+ std::unordered_set<GL_INDEX_ID> *gl_index_ids,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const;
bool is_index_operation_ongoing(const GL_INDEX_ID &gl_index_id,
Rdb_key_def::DATA_DICT_TYPE dd_type) const;
void start_ongoing_index_operation(rocksdb::WriteBatch *batch,
@@ -1335,15 +1439,15 @@ public:
Rdb_key_def::DATA_DICT_TYPE dd_type) const;
bool is_drop_index_empty() const;
void add_drop_table(std::shared_ptr<Rdb_key_def> *const key_descr,
- const uint32 &n_keys,
+ const uint32 n_keys,
rocksdb::WriteBatch *const batch) const;
void add_drop_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
rocksdb::WriteBatch *const batch) const;
void add_create_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
rocksdb::WriteBatch *const batch) const;
- void
- finish_indexes_operation(const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
- Rdb_key_def::DATA_DICT_TYPE dd_type) const;
+ void finish_indexes_operation(
+ const std::unordered_set<GL_INDEX_ID> &gl_index_ids,
+ Rdb_key_def::DATA_DICT_TYPE dd_type) const;
void rollback_ongoing_index_creation() const;
inline void get_ongoing_drop_indexes(
@@ -1386,7 +1490,7 @@ public:
bool get_max_index_id(uint32_t *const index_id) const;
bool update_max_index_id(rocksdb::WriteBatch *const batch,
- const uint32_t &index_id) const;
+ const uint32_t index_id) const;
void add_stats(rocksdb::WriteBatch *const batch,
const std::vector<Rdb_index_stats> &stats) const;
Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const;
@@ -1513,4 +1617,4 @@ class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator {
bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs);
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_global.h b/storage/rocksdb/rdb_global.h
new file mode 100644
index 00000000000..7213571bf61
--- /dev/null
+++ b/storage/rocksdb/rdb_global.h
@@ -0,0 +1,392 @@
+/*
+ Copyright (c) 2018, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* MyRocks global type definitions goes here */
+
+#pragma once
+
+/* C++ standard header files */
+#include <limits>
+#include <string>
+#include <vector>
+
+/* MySQL header files */
+#include "./handler.h" /* handler */
+#include "./my_global.h" /* ulonglong */
+#include "./sql_string.h"
+#include "./ut0counter.h"
+
+namespace myrocks {
+/*
+ * class for exporting transaction information for
+ * information_schema.rocksdb_trx
+ */
+struct Rdb_trx_info {
+ std::string name;
+ ulonglong trx_id;
+ ulonglong write_count;
+ ulonglong lock_count;
+ int timeout_sec;
+ std::string state;
+ std::string waiting_key;
+ ulonglong waiting_cf_id;
+ int is_replication;
+ int skip_trx_api;
+ int read_only;
+ int deadlock_detect;
+ int num_ongoing_bulk_load;
+ ulong thread_id;
+ std::string query_str;
+};
+
+std::vector<Rdb_trx_info> rdb_get_all_trx_info();
+
+/*
+ * class for exporting deadlock transaction information for
+ * information_schema.rocksdb_deadlock
+ */
+struct Rdb_deadlock_info {
+ struct Rdb_dl_trx_info {
+ ulonglong trx_id;
+ std::string cf_name;
+ std::string waiting_key;
+ bool exclusive_lock;
+ std::string index_name;
+ std::string table_name;
+ };
+ std::vector<Rdb_dl_trx_info> path;
+ int64_t deadlock_time;
+ ulonglong victim_trx_id;
+};
+
+std::vector<Rdb_deadlock_info> rdb_get_deadlock_info();
+
+/*
+ This is
+ - the name of the default Column Family (the CF which stores indexes which
+ didn't explicitly specify which CF they are in)
+ - the name used to set the default column family parameter for per-cf
+ arguments.
+*/
+extern const std::string DEFAULT_CF_NAME;
+
+/*
+ This is the name of the Column Family used for storing the data dictionary.
+*/
+extern const std::string DEFAULT_SYSTEM_CF_NAME;
+
+/*
+ This is the name of the hidden primary key for tables with no pk.
+*/
+const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID";
+
+/*
+ Column family name which means "put this index into its own column family".
+ DEPRECATED!!!
+*/
+extern const std::string PER_INDEX_CF_NAME;
+
+/*
+ Name for the background thread.
+*/
+const char *const BG_THREAD_NAME = "myrocks-bg";
+
+/*
+ Name for the drop index thread.
+*/
+const char *const INDEX_THREAD_NAME = "myrocks-index";
+
+/*
+ Name for the manual compaction thread.
+*/
+const char *const MANUAL_COMPACTION_THREAD_NAME = "myrocks-mc";
+
+/*
+ Separator between partition name and the qualifier. Sample usage:
+
+ - p0_cfname=foo
+ - p3_tts_col=bar
+*/
+const char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_';
+
+/*
+ Separator between qualifier name and value. Sample usage:
+
+ - p0_cfname=foo
+ - p3_tts_col=bar
+*/
+const char RDB_QUALIFIER_VALUE_SEP = '=';
+
+/*
+ Separator between multiple qualifier assignments. Sample usage:
+
+ - p0_cfname=foo;p1_cfname=bar;p2_cfname=baz
+*/
+const char RDB_QUALIFIER_SEP = ';';
+
+/*
+ Qualifier name for a custom per partition column family.
+*/
+const char *const RDB_CF_NAME_QUALIFIER = "cfname";
+
+/*
+ Qualifier name for a custom per partition ttl duration.
+*/
+const char *const RDB_TTL_DURATION_QUALIFIER = "ttl_duration";
+
+/*
+ Qualifier name for a custom per partition ttl duration.
+*/
+const char *const RDB_TTL_COL_QUALIFIER = "ttl_col";
+
+/*
+ Default, minimal valid, and maximum valid sampling rate values when collecting
+ statistics about table.
+*/
+#define RDB_DEFAULT_TBL_STATS_SAMPLE_PCT 10
+#define RDB_TBL_STATS_SAMPLE_PCT_MIN 1
+#define RDB_TBL_STATS_SAMPLE_PCT_MAX 100
+
+/*
+ Default and maximum values for rocksdb-compaction-sequential-deletes and
+ rocksdb-compaction-sequential-deletes-window to add basic boundary checking.
+*/
+#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES 0
+#define MAX_COMPACTION_SEQUENTIAL_DELETES 2000000
+
+#define DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW 0
+#define MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW 2000000
+
+/*
+ Default and maximum values for various compaction and flushing related
+ options. Numbers are based on the hardware we currently use and our internal
+ benchmarks which indicate that parallelization helps with the speed of
+ compactions.
+
+ Ideally of course we'll use heuristic technique to determine the number of
+ CPU-s and derive the values from there. This however has its own set of
+ problems and we'll choose simplicity for now.
+*/
+#define MAX_BACKGROUND_JOBS 64
+
+#define DEFAULT_SUBCOMPACTIONS 1
+#define MAX_SUBCOMPACTIONS 64
+
+/*
+ Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled).
+*/
+#define DEFAULT_SST_MGR_RATE_BYTES_PER_SEC 0
+
+/*
+ Defines the field sizes for serializing XID object to a string representation.
+ string byte format: [field_size: field_value, ...]
+ [
+ 8: XID.formatID,
+ 1: XID.gtrid_length,
+ 1: XID.bqual_length,
+ XID.gtrid_length + XID.bqual_length: XID.data
+ ]
+*/
+#define RDB_FORMATID_SZ 8
+#define RDB_GTRID_SZ 1
+#define RDB_BQUAL_SZ 1
+#define RDB_XIDHDR_LEN (RDB_FORMATID_SZ + RDB_GTRID_SZ + RDB_BQUAL_SZ)
+
+/*
+ To fix an unhandled exception we specify the upper bound as LONGLONGMAX
+ instead of ULONGLONGMAX because the latter is -1 and causes an exception when
+ cast to jlong (signed) of JNI
+
+ The reason behind the cast issue is the lack of unsigned int support in Java.
+*/
+#define MAX_RATE_LIMITER_BYTES_PER_SEC static_cast<uint64_t>(LLONG_MAX)
+
+/*
+ Hidden PK column (for tables with no primary key) is a longlong (aka 8 bytes).
+ static_assert() in code will validate this assumption.
+*/
+#define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong)
+
+/*
+ Bytes used to store TTL, in the beginning of all records for tables with TTL
+ enabled.
+*/
+#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
+
+#define ROCKSDB_SIZEOF_AUTOINC_VALUE sizeof(longlong)
+
+/*
+ Maximum index prefix length in bytes.
+*/
+#define MAX_INDEX_COL_LEN_LARGE 3072
+#define MAX_INDEX_COL_LEN_SMALL 767
+
+/*
+ MyRocks specific error codes. NB! Please make sure that you will update
+ HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in
+ rdb_error_messages to include any new error messages.
+*/
+#define HA_ERR_ROCKSDB_FIRST (HA_ERR_LAST + 1)
+#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0)
+#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \
+ (HA_ERR_ROCKSDB_FIRST + 1)
+#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \
+ (HA_ERR_ROCKSDB_FIRST + 2)
+#define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3)
+#define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4)
+#define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5)
+#define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6)
+#define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7)
+#define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8)
+#define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9)
+/*
+ Each error code below maps to a RocksDB status code found in:
+ rocksdb/include/rocksdb/status.h
+*/
+#define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_LAST + 10)
+#define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_LAST + 11)
+#define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_LAST + 12)
+#define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_LAST + 13)
+#define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_LAST + 14)
+#define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_LAST + 15)
+#define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_LAST + 16)
+#define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_LAST + 17)
+#define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_LAST + 18)
+#define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_LAST + 19)
+#define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_LAST + 20)
+#define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_LAST + 21)
+#define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_LAST + 22)
+#define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_LAST + 23)
+#define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_LAST + 24)
+#define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_LAST + 25)
+#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN
+
+const char *const rocksdb_hton_name = "ROCKSDB";
+
+typedef struct _gl_index_id_s {
+ uint32_t cf_id;
+ uint32_t index_id;
+ bool operator==(const struct _gl_index_id_s &other) const {
+ return cf_id == other.cf_id && index_id == other.index_id;
+ }
+ bool operator!=(const struct _gl_index_id_s &other) const {
+ return cf_id != other.cf_id || index_id != other.index_id;
+ }
+ bool operator<(const struct _gl_index_id_s &other) const {
+ return cf_id < other.cf_id ||
+ (cf_id == other.cf_id && index_id < other.index_id);
+ }
+ bool operator<=(const struct _gl_index_id_s &other) const {
+ return cf_id < other.cf_id ||
+ (cf_id == other.cf_id && index_id <= other.index_id);
+ }
+ bool operator>(const struct _gl_index_id_s &other) const {
+ return cf_id > other.cf_id ||
+ (cf_id == other.cf_id && index_id > other.index_id);
+ }
+ bool operator>=(const struct _gl_index_id_s &other) const {
+ return cf_id > other.cf_id ||
+ (cf_id == other.cf_id && index_id >= other.index_id);
+ }
+} GL_INDEX_ID;
+
+enum operation_type : int {
+ ROWS_DELETED = 0,
+ ROWS_INSERTED,
+ ROWS_READ,
+ ROWS_UPDATED,
+ ROWS_DELETED_BLIND,
+ ROWS_EXPIRED,
+ ROWS_FILTERED,
+ ROWS_HIDDEN_NO_SNAPSHOT,
+ ROWS_MAX
+};
+
+enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX };
+
+#if defined(HAVE_SCHED_GETCPU)
+#define RDB_INDEXER get_sched_indexer_t
+#else
+#define RDB_INDEXER thread_id_indexer_t
+#endif
+
+/* Global statistics struct used inside MyRocks */
+struct st_global_stats {
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> rows[ROWS_MAX];
+
+ // system_rows_ stats are only for system
+ // tables. They are not counted in rows_* stats.
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups;
+};
+
+/* Struct used for exporting status to MySQL */
+struct st_export_stats {
+ ulonglong rows_deleted;
+ ulonglong rows_inserted;
+ ulonglong rows_read;
+ ulonglong rows_updated;
+ ulonglong rows_deleted_blind;
+ ulonglong rows_expired;
+ ulonglong rows_filtered;
+ ulonglong rows_hidden_no_snapshot;
+
+ ulonglong system_rows_deleted;
+ ulonglong system_rows_inserted;
+ ulonglong system_rows_read;
+ ulonglong system_rows_updated;
+
+ ulonglong queries_point;
+ ulonglong queries_range;
+
+ ulonglong covered_secondary_key_lookups;
+};
+
+/* Struct used for exporting RocksDB memory status */
+struct st_memory_stats {
+ ulonglong memtable_total;
+ ulonglong memtable_unflushed;
+};
+
+/* Struct used for exporting RocksDB IO stalls stats */
+struct st_io_stall_stats {
+ ulonglong level0_slowdown;
+ ulonglong level0_slowdown_with_compaction;
+ ulonglong level0_numfiles;
+ ulonglong level0_numfiles_with_compaction;
+ ulonglong stop_for_pending_compaction_bytes;
+ ulonglong slowdown_for_pending_compaction_bytes;
+ ulonglong memtable_compaction;
+ ulonglong memtable_slowdown;
+ ulonglong total_stop;
+ ulonglong total_slowdown;
+
+ st_io_stall_stats()
+ : level0_slowdown(0),
+ level0_slowdown_with_compaction(0),
+ level0_numfiles(0),
+ level0_numfiles_with_compaction(0),
+ stop_for_pending_compaction_bytes(0),
+ slowdown_for_pending_compaction_bytes(0),
+ memtable_compaction(0),
+ memtable_slowdown(0),
+ total_stop(0),
+ total_slowdown(0) {}
+};
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
index 2f41c1e8a9f..c4b2f4d3bb6 100644
--- a/storage/rocksdb/rdb_i_s.cc
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -52,10 +52,10 @@ namespace myrocks {
engine.
*/
-#define ROCKSDB_FIELD_INFO(_name_, _len_, _type_, _flag_) \
+#define ROCKSDB_FIELD_INFO(_name_, _len_, _type_, _flag_) \
{ _name_, _len_, _type_, 0, _flag_, nullptr, 0 }
-#define ROCKSDB_FIELD_INFO_END \
+#define ROCKSDB_FIELD_INFO_END \
ROCKSDB_FIELD_INFO(nullptr, 0, MYSQL_TYPE_NULL, 0)
/*
@@ -63,7 +63,7 @@ namespace myrocks {
*/
namespace RDB_CFSTATS_FIELD {
enum { CF_NAME = 0, STAT_TYPE, VALUE };
-} // namespace RDB_CFSTATS_FIELD
+} // namespace RDB_CFSTATS_FIELD
static ST_FIELD_INFO rdb_i_s_cfstats_fields_info[] = {
ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
@@ -162,7 +162,7 @@ static int rdb_i_s_cfstats_init(void *p) {
*/
namespace RDB_DBSTATS_FIELD {
enum { STAT_TYPE = 0, VALUE };
-} // namespace RDB_DBSTATS_FIELD
+} // namespace RDB_DBSTATS_FIELD
static ST_FIELD_INFO rdb_i_s_dbstats_fields_info[] = {
ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
@@ -258,7 +258,7 @@ static int rdb_i_s_dbstats_init(void *const p) {
*/
namespace RDB_PERF_CONTEXT_FIELD {
enum { TABLE_SCHEMA = 0, TABLE_NAME, PARTITION_NAME, STAT_TYPE, VALUE };
-} // namespace RDB_PERF_CONTEXT_FIELD
+} // namespace RDB_PERF_CONTEXT_FIELD
static ST_FIELD_INFO rdb_i_s_perf_context_fields_info[] = {
ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
@@ -361,7 +361,7 @@ static int rdb_i_s_perf_context_init(void *const p) {
*/
namespace RDB_PERF_CONTEXT_GLOBAL_FIELD {
enum { STAT_TYPE = 0, VALUE };
-} // namespace RDB_PERF_CONTEXT_GLOBAL_FIELD
+} // namespace RDB_PERF_CONTEXT_GLOBAL_FIELD
static ST_FIELD_INFO rdb_i_s_perf_context_global_fields_info[] = {
ROCKSDB_FIELD_INFO("STAT_TYPE", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
@@ -431,7 +431,7 @@ static int rdb_i_s_perf_context_global_init(void *const p) {
*/
namespace RDB_CFOPTIONS_FIELD {
enum { CF_NAME = 0, OPTION_TYPE, VALUE };
-} // namespace RDB_CFOPTIONS_FIELD
+} // namespace RDB_CFOPTIONS_FIELD
static ST_FIELD_INFO rdb_i_s_cfoptions_fields_info[] = {
ROCKSDB_FIELD_INFO("CF_NAME", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
@@ -591,20 +591,20 @@ static int rdb_i_s_cfoptions_fill_table(
// get COMPACTION_STYLE option
switch (opts.compaction_style) {
- case rocksdb::kCompactionStyleLevel:
- val = "kCompactionStyleLevel";
- break;
- case rocksdb::kCompactionStyleUniversal:
- val = "kCompactionStyleUniversal";
- break;
- case rocksdb::kCompactionStyleFIFO:
- val = "kCompactionStyleFIFO";
- break;
- case rocksdb::kCompactionStyleNone:
- val = "kCompactionStyleNone";
- break;
- default:
- val = "NULL";
+ case rocksdb::kCompactionStyleLevel:
+ val = "kCompactionStyleLevel";
+ break;
+ case rocksdb::kCompactionStyleUniversal:
+ val = "kCompactionStyleUniversal";
+ break;
+ case rocksdb::kCompactionStyleFIFO:
+ val = "kCompactionStyleFIFO";
+ break;
+ case rocksdb::kCompactionStyleNone:
+ val = "kCompactionStyleNone";
+ break;
+ default:
+ val = "NULL";
}
cf_option_types.push_back({"COMPACTION_STYLE", val});
@@ -627,14 +627,14 @@ static int rdb_i_s_cfoptions_fill_table(
val.append("; STOP_STYLE=");
switch (compac_opts.stop_style) {
- case rocksdb::kCompactionStopStyleSimilarSize:
- val.append("kCompactionStopStyleSimilarSize}");
- break;
- case rocksdb::kCompactionStopStyleTotalSize:
- val.append("kCompactionStopStyleTotalSize}");
- break;
- default:
- val.append("}");
+ case rocksdb::kCompactionStopStyleSimilarSize:
+ val.append("kCompactionStopStyleSimilarSize}");
+ break;
+ case rocksdb::kCompactionStopStyleTotalSize:
+ val.append("kCompactionStopStyleTotalSize}");
+ break;
+ default:
+ val.append("}");
}
cf_option_types.push_back({"COMPACTION_OPTIONS_UNIVERSAL", val});
@@ -793,10 +793,11 @@ static int rdb_i_s_global_info_fill_table(
if (!dict_manager->get_cf_flags(cf_handle->GetID(), &flags)) {
// NO_LINT_DEBUG
- sql_print_error("RocksDB: Failed to get column family flags "
- "from CF with id = %u. MyRocks data dictionary may "
- "be corrupted.",
- cf_handle->GetID());
+ sql_print_error(
+ "RocksDB: Failed to get column family flags "
+ "from CF with id = %u. MyRocks data dictionary may "
+ "be corrupted.",
+ cf_handle->GetID());
abort();
}
@@ -905,7 +906,7 @@ static ST_FIELD_INFO rdb_i_s_compact_stats_fields_info[] = {
ROCKSDB_FIELD_INFO("VALUE", sizeof(double), MYSQL_TYPE_DOUBLE, 0),
ROCKSDB_FIELD_INFO_END};
-namespace // anonymous namespace = not visible outside this source file
+namespace // anonymous namespace = not visible outside this source file
{
struct Rdb_ddl_scanner : public Rdb_tables_scanner {
my_core::THD *m_thd;
@@ -913,7 +914,7 @@ struct Rdb_ddl_scanner : public Rdb_tables_scanner {
int add_table(Rdb_tbl_def *tdef) override;
};
-} // anonymous namespace
+} // anonymous namespace
/*
Support for INFORMATION_SCHEMA.ROCKSDB_DDL dynamic table
@@ -933,7 +934,7 @@ enum {
CF,
AUTO_INCREMENT
};
-} // namespace RDB_DDL_FIELD
+} // namespace RDB_DDL_FIELD
static ST_FIELD_INFO rdb_i_s_ddl_fields_info[] = {
ROCKSDB_FIELD_INFO("TABLE_SCHEMA", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
@@ -1008,8 +1009,7 @@ int Rdb_ddl_scanner::add_table(Rdb_tbl_def *tdef) {
}
ret = my_core::schema_table_store_record(m_thd, m_table);
- if (ret)
- return ret;
+ if (ret) return ret;
}
return HA_EXIT_SUCCESS;
}
@@ -1146,7 +1146,11 @@ enum {
TOP_LEVEL_INDEX_SIZE,
FILTER_BLOCK_SIZE,
COMPRESSION_ALGO,
- CREATION_TIME
+ CREATION_TIME,
+ FILE_CREATION_TIME,
+ OLDEST_KEY_TIME,
+ FILTER_POLICY,
+ COMPRESSION_OPTIONS,
};
} // namespace RDB_SST_PROPS_FIELD
@@ -1171,6 +1175,13 @@ static ST_FIELD_INFO rdb_i_s_sst_props_fields_info[] = {
ROCKSDB_FIELD_INFO("COMPRESSION_ALGO", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
ROCKSDB_FIELD_INFO("CREATION_TIME", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
0),
+ ROCKSDB_FIELD_INFO("FILE_CREATION_TIME", sizeof(int64_t),
+ MYSQL_TYPE_LONGLONG, 0),
+ ROCKSDB_FIELD_INFO("OLDEST_KEY_TIME", sizeof(int64_t), MYSQL_TYPE_LONGLONG,
+ 0),
+ ROCKSDB_FIELD_INFO("FILTER_POLICY", NAME_LEN + 1, MYSQL_TYPE_STRING, 0),
+ ROCKSDB_FIELD_INFO("COMPRESSION_OPTIONS", NAME_LEN + 1, MYSQL_TYPE_STRING,
+ 0),
ROCKSDB_FIELD_INFO_END};
static int rdb_i_s_sst_props_fill_table(
@@ -1243,6 +1254,24 @@ static int rdb_i_s_sst_props_fill_table(
}
field[RDB_SST_PROPS_FIELD::CREATION_TIME]->store(
props.second->creation_time, true);
+ field[RDB_SST_PROPS_FIELD::FILE_CREATION_TIME]->store(
+ props.second->file_creation_time, true);
+ field[RDB_SST_PROPS_FIELD::OLDEST_KEY_TIME]->store(
+ props.second->oldest_key_time, true);
+ if (props.second->filter_policy_name.empty()) {
+ field[RDB_SST_PROPS_FIELD::FILTER_POLICY]->set_null();
+ } else {
+ field[RDB_SST_PROPS_FIELD::FILTER_POLICY]->store(
+ props.second->filter_policy_name.c_str(),
+ props.second->filter_policy_name.size(), system_charset_info);
+ }
+ if (props.second->compression_options.empty()) {
+ field[RDB_SST_PROPS_FIELD::COMPRESSION_OPTIONS]->set_null();
+ } else {
+ field[RDB_SST_PROPS_FIELD::COMPRESSION_OPTIONS]->store(
+ props.second->compression_options.c_str(),
+ props.second->compression_options.size(), system_charset_info);
+ }
/* Tell MySQL about this row in the virtual table */
ret = static_cast<int>(
@@ -1289,7 +1318,7 @@ enum {
ENTRY_OTHERS,
DISTINCT_KEYS_PREFIX
};
-} // namespace RDB_INDEX_FILE_MAP_FIELD
+} // namespace RDB_INDEX_FILE_MAP_FIELD
static ST_FIELD_INFO rdb_i_s_index_file_map_fields_info[] = {
/* The information_schema.rocksdb_index_file_map virtual table has four
@@ -1443,7 +1472,7 @@ static int rdb_i_s_index_file_map_init(void *const p) {
*/
namespace RDB_LOCKS_FIELD {
enum { COLUMN_FAMILY_ID = 0, TRANSACTION_ID, KEY, MODE };
-} // namespace RDB_LOCKS_FIELD
+} // namespace RDB_LOCKS_FIELD
static ST_FIELD_INFO rdb_i_s_lock_info_fields_info[] = {
ROCKSDB_FIELD_INFO("COLUMN_FAMILY_ID", sizeof(uint32_t), MYSQL_TYPE_LONG,
@@ -1545,7 +1574,7 @@ enum {
THREAD_ID,
QUERY
};
-} // namespace RDB_TRX_FIELD
+} // namespace RDB_TRX_FIELD
static ST_FIELD_INFO rdb_i_s_trx_info_fields_info[] = {
ROCKSDB_FIELD_INFO("TRANSACTION_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
@@ -1672,7 +1701,7 @@ enum {
TABLE_NAME,
ROLLED_BACK,
};
-} // namespace RDB_TRX_FIELD
+} // namespace RDB_DEADLOCK_FIELD
static ST_FIELD_INFO rdb_i_s_deadlock_info_fields_info[] = {
ROCKSDB_FIELD_INFO("DEADLOCK_ID", sizeof(ulonglong), MYSQL_TYPE_LONGLONG,
@@ -1988,4 +2017,4 @@ struct st_maria_plugin rdb_i_s_deadlock_info = {
nullptr, /* config options */
MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.h b/storage/rocksdb/rdb_i_s.h
index a0783f7b8c0..6001742d984 100644
--- a/storage/rocksdb/rdb_i_s.h
+++ b/storage/rocksdb/rdb_i_s.h
@@ -34,4 +34,4 @@ extern struct st_maria_plugin rdb_i_s_index_file_map;
extern struct st_maria_plugin rdb_i_s_lock_info;
extern struct st_maria_plugin rdb_i_s_trx_info;
extern struct st_maria_plugin rdb_i_s_deadlock_info;
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_index_merge.cc b/storage/rocksdb/rdb_index_merge.cc
index f1245a205b5..424a998548a 100644
--- a/storage/rocksdb/rdb_index_merge.cc
+++ b/storage/rocksdb/rdb_index_merge.cc
@@ -29,14 +29,17 @@
namespace myrocks {
Rdb_index_merge::Rdb_index_merge(const char *const tmpfile_path,
- const ulonglong &merge_buf_size,
- const ulonglong &merge_combine_read_size,
- const ulonglong &merge_tmp_file_removal_delay,
+ const ulonglong merge_buf_size,
+ const ulonglong merge_combine_read_size,
+ const ulonglong merge_tmp_file_removal_delay,
rocksdb::ColumnFamilyHandle *cf)
- : m_tmpfile_path(tmpfile_path), m_merge_buf_size(merge_buf_size),
+ : m_tmpfile_path(tmpfile_path),
+ m_merge_buf_size(merge_buf_size),
m_merge_combine_read_size(merge_combine_read_size),
m_merge_tmp_file_removal_delay(merge_tmp_file_removal_delay),
- m_cf_handle(cf), m_rec_buf_unsorted(nullptr), m_output_buf(nullptr) {}
+ m_cf_handle(cf),
+ m_rec_buf_unsorted(nullptr),
+ m_output_buf(nullptr) {}
Rdb_index_merge::~Rdb_index_merge() {
/*
@@ -152,8 +155,9 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
*/
if (m_offset_tree.empty()) {
// NO_LINT_DEBUG
- sql_print_error("Sort buffer size is too small to process merge. "
- "Please set merge buffer size to a higher value.");
+ sql_print_error(
+ "Sort buffer size is too small to process merge. "
+ "Please set merge buffer size to a higher value.");
return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
@@ -623,4 +627,4 @@ void Rdb_index_merge::merge_reset() {
}
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_index_merge.h b/storage/rocksdb/rdb_index_merge.h
index e70923bbb0e..756b99ca4f1 100644
--- a/storage/rocksdb/rdb_index_merge.h
+++ b/storage/rocksdb/rdb_index_merge.h
@@ -61,7 +61,7 @@ class Rdb_index_merge {
/* heap memory allocated for main memory sort/merge */
std::unique_ptr<uchar[]> m_block;
const ulonglong
- m_block_len; /* amount of data bytes allocated for block above */
+ m_block_len; /* amount of data bytes allocated for block above */
ulonglong m_curr_offset; /* offset of the record pointer for the block */
ulonglong m_disk_start_offset; /* where the chunk starts on disk */
ulonglong m_disk_curr_offset; /* current offset on disk */
@@ -87,8 +87,11 @@ class Rdb_index_merge {
}
explicit merge_buf_info(const ulonglong merge_block_size)
- : m_block(nullptr), m_block_len(merge_block_size), m_curr_offset(0),
- m_disk_start_offset(0), m_disk_curr_offset(0),
+ : m_block(nullptr),
+ m_block_len(merge_block_size),
+ m_curr_offset(0),
+ m_disk_start_offset(0),
+ m_disk_curr_offset(0),
m_total_size(merge_block_size) {
/* Will throw an exception if it runs out of memory here */
m_block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
@@ -189,9 +192,9 @@ class Rdb_index_merge {
public:
Rdb_index_merge(const char *const tmpfile_path,
- const ulonglong &merge_buf_size,
- const ulonglong &merge_combine_read_size,
- const ulonglong &merge_tmp_file_removal_delay,
+ const ulonglong merge_buf_size,
+ const ulonglong merge_combine_read_size,
+ const ulonglong merge_tmp_file_removal_delay,
rocksdb::ColumnFamilyHandle *cf);
~Rdb_index_merge();
diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc
index 5b809dbf553..07834118db0 100644
--- a/storage/rocksdb/rdb_io_watchdog.cc
+++ b/storage/rocksdb/rdb_io_watchdog.cc
@@ -40,10 +40,11 @@ void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
// At this point we know that I/O has been stuck in `write()` for more than
// `m_write_timeout` seconds. We'll log a message and shut down the service.
// NO_LINT_DEBUG
- sql_print_error("MyRocks has detected a combination of I/O requests which "
- "have cumulatively been blocking for more than %u seconds. "
- "Shutting the service down.",
- m_write_timeout);
+ sql_print_error(
+ "MyRocks has detected a combination of I/O requests which "
+ "have cumulatively been blocking for more than %u seconds. "
+ "Shutting the service down.",
+ m_write_timeout);
abort();
}
@@ -151,7 +152,7 @@ int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
return HA_EXIT_SUCCESS;
}
-int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) {
+int Rdb_io_watchdog::reset_timeout(const uint32_t write_timeout) {
// This function will be called either from a thread initializing MyRocks
// engine or handling system variable changes. We need to account for the
// possibility of I/O callback executing at the same time. If that happens
diff --git a/storage/rocksdb/rdb_io_watchdog.h b/storage/rocksdb/rdb_io_watchdog.h
index 9c391eee3f3..8ee5b1f6c93 100644
--- a/storage/rocksdb/rdb_io_watchdog.h
+++ b/storage/rocksdb/rdb_io_watchdog.h
@@ -17,12 +17,12 @@
#pragma once
/* C++ standard header files */
-#include <atomic>
#include <signal.h>
#include <stdlib.h>
-#include <string>
#include <string.h>
#include <time.h>
+#include <atomic>
+#include <string>
#include <vector>
/* MySQL header files */
@@ -92,9 +92,11 @@ class Rdb_io_watchdog {
}
public:
- explicit Rdb_io_watchdog(const std::vector<std::string> &directories)
- : m_io_check_timer(nullptr), m_io_check_watchdog_timer(nullptr),
- m_io_in_progress(false), m_dirs_to_check(std::move(directories)),
+ explicit Rdb_io_watchdog(std::vector<std::string> &&directories)
+ : m_io_check_timer(nullptr),
+ m_io_check_watchdog_timer(nullptr),
+ m_io_in_progress(false),
+ m_dirs_to_check(std::move(directories)),
m_buf(nullptr) {
DBUG_ASSERT(m_dirs_to_check.size() > 0);
mysql_mutex_init(0, &m_reset_mutex, MY_MUTEX_INIT_FAST);
@@ -107,7 +109,7 @@ class Rdb_io_watchdog {
free(m_buf);
}
- int reset_timeout(const uint32_t &write_timeout);
+ int reset_timeout(const uint32_t write_timeout);
Rdb_io_watchdog(const Rdb_io_watchdog &) = delete;
Rdb_io_watchdog &operator=(const Rdb_io_watchdog &) = delete;
diff --git a/storage/rocksdb/rdb_mariadb_server_port.cc b/storage/rocksdb/rdb_mariadb_server_port.cc
index 84a800807eb..f63e4bb36ad 100644
--- a/storage/rocksdb/rdb_mariadb_server_port.cc
+++ b/storage/rocksdb/rdb_mariadb_server_port.cc
@@ -10,9 +10,7 @@
#include "./log.h"
#include <mysys_err.h>
#include <mysql/psi/mysql_table.h>
-#ifdef MARIAROCKS_NOT_YET
-#include <mysql/thread_pool_priv.h>
-#endif
+//#include <mysql/thread_pool_priv.h>
#include <string>
diff --git a/storage/rocksdb/rdb_mutex_wrapper.cc b/storage/rocksdb/rdb_mutex_wrapper.cc
index 5d58b5709d0..2cc0bac41f9 100644
--- a/storage/rocksdb/rdb_mutex_wrapper.cc
+++ b/storage/rocksdb/rdb_mutex_wrapper.cc
@@ -67,9 +67,9 @@ Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) {
thd_killed() to determine which occurred)
*/
-Status
-Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg,
- int64_t timeout_micros) {
+Status Rdb_cond_var::WaitFor(
+ const std::shared_ptr<TransactionDBMutex> mutex_arg,
+ int64_t timeout_micros) {
auto *mutex_obj = reinterpret_cast<Rdb_mutex *>(mutex_arg.get());
DBUG_ASSERT(mutex_obj != nullptr);
@@ -78,8 +78,7 @@ Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg,
int res = 0;
struct timespec wait_timeout;
- if (timeout_micros < 0)
- timeout_micros = ONE_YEAR_IN_MICROSECS;
+ if (timeout_micros < 0) timeout_micros = ONE_YEAR_IN_MICROSECS;
set_timespec_nsec(wait_timeout, timeout_micros * 1000);
#ifndef STANDALONE_UNITTEST
@@ -108,15 +107,15 @@ Rdb_cond_var::WaitFor(const std::shared_ptr<TransactionDBMutex> mutex_arg,
res = mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout);
#ifndef STANDALONE_UNITTEST
- if (current_thd)
- killed= thd_killed(current_thd);
+ if (current_thd) killed = thd_killed(current_thd);
#endif
} while (!killed && res == EINTR);
- if (res || killed)
+ if (res || killed) {
return Status::TimedOut();
- else
+ } else {
return Status::OK();
+ }
}
/*
@@ -212,4 +211,4 @@ void Rdb_mutex::UnLock() {
RDB_MUTEX_UNLOCK_CHECK(m_mutex);
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_mutex_wrapper.h b/storage/rocksdb/rdb_mutex_wrapper.h
index 567e81e5ef6..33eefe9d50c 100644
--- a/storage/rocksdb/rdb_mutex_wrapper.h
+++ b/storage/rocksdb/rdb_mutex_wrapper.h
@@ -37,9 +37,9 @@ class Rdb_mutex : public rocksdb::TransactionDBMutex {
Rdb_mutex(const Rdb_mutex &p) = delete;
Rdb_mutex &operator=(const Rdb_mutex &p) = delete;
-public:
+ public:
Rdb_mutex();
- virtual ~Rdb_mutex();
+ virtual ~Rdb_mutex() override;
/*
Override parent class's virtual methods of interrest.
@@ -55,13 +55,13 @@ public:
// TimedOut if timed out,
// or other Status on failure.
// If returned status is OK, TransactionDB will eventually call UnLock().
- virtual rocksdb::Status
- TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) override;
+ virtual rocksdb::Status TryLockFor(
+ int64_t timeout_time MY_ATTRIBUTE((__unused__))) override;
// Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
virtual void UnLock() override;
-private:
+ private:
mysql_mutex_t m_mutex;
friend class Rdb_cond_var;
@@ -75,9 +75,9 @@ class Rdb_cond_var : public rocksdb::TransactionDBCondVar {
Rdb_cond_var(const Rdb_cond_var &) = delete;
Rdb_cond_var &operator=(const Rdb_cond_var &) = delete;
-public:
+ public:
Rdb_cond_var();
- virtual ~Rdb_cond_var();
+ virtual ~Rdb_cond_var() override;
/*
Override parent class's virtual methods of interrest.
@@ -88,8 +88,8 @@ public:
// Returns OK if notified.
// Returns non-OK if TransactionDB should stop waiting and fail the operation.
// May return OK spuriously even if not notified.
- virtual rocksdb::Status
- Wait(const std::shared_ptr<rocksdb::TransactionDBMutex> mutex) override;
+ virtual rocksdb::Status Wait(
+ const std::shared_ptr<rocksdb::TransactionDBMutex> mutex) override;
// Block current thread until condition variable is notifiesd by a call to
// Notify() or NotifyAll(), or if the timeout is reached.
@@ -103,9 +103,9 @@ public:
// Returns other status if TransactionDB should otherwis stop waiting and
// fail the operation.
// May return OK spuriously even if not notified.
- virtual rocksdb::Status
- WaitFor(const std::shared_ptr<rocksdb::TransactionDBMutex> mutex,
- int64_t timeout_time) override;
+ virtual rocksdb::Status WaitFor(
+ const std::shared_ptr<rocksdb::TransactionDBMutex> mutex,
+ int64_t timeout_time) override;
// If any threads are waiting on *this, unblock at least one of the
// waiting threads.
@@ -114,12 +114,12 @@ public:
// Unblocks all threads waiting on *this.
virtual void NotifyAll() override;
-private:
+ private:
mysql_cond_t m_cond;
};
class Rdb_mutex_factory : public rocksdb::TransactionDBMutexFactory {
-public:
+ public:
Rdb_mutex_factory(const Rdb_mutex_factory &) = delete;
Rdb_mutex_factory &operator=(const Rdb_mutex_factory &) = delete;
Rdb_mutex_factory() {}
@@ -127,17 +127,17 @@ public:
Override parent class's virtual methods of interrest.
*/
- virtual std::shared_ptr<rocksdb::TransactionDBMutex>
- AllocateMutex() override {
+ virtual std::shared_ptr<rocksdb::TransactionDBMutex> AllocateMutex()
+ override {
return std::make_shared<Rdb_mutex>();
}
- virtual std::shared_ptr<rocksdb::TransactionDBCondVar>
- AllocateCondVar() override {
+ virtual std::shared_ptr<rocksdb::TransactionDBCondVar> AllocateCondVar()
+ override {
return std::make_shared<Rdb_cond_var>();
}
- virtual ~Rdb_mutex_factory() {}
+ virtual ~Rdb_mutex_factory() override {}
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc
index 34584484dd2..d7a126d86e4 100644
--- a/storage/rocksdb/rdb_perf_context.cc
+++ b/storage/rocksdb/rdb_perf_context.cc
@@ -98,17 +98,19 @@ std::string rdb_pc_stat_types[] = {
"IO_RANGE_SYNC_NANOS",
"IO_LOGGER_NANOS"};
-#define IO_PERF_RECORD(_field_) \
- do { \
- if (rocksdb::get_perf_context()->_field_ > 0) \
- counters->m_value[idx] += rocksdb::get_perf_context()->_field_; \
- idx++; \
+#define IO_PERF_RECORD(_field_) \
+ do { \
+ if (rocksdb::get_perf_context()->_field_ > 0) { \
+ counters->m_value[idx] += rocksdb::get_perf_context()->_field_; \
+ } \
+ idx++; \
} while (0)
-#define IO_STAT_RECORD(_field_) \
- do { \
- if (rocksdb::get_iostats_context()->_field_ > 0) \
- counters->m_value[idx] += rocksdb::get_iostats_context()->_field_; \
- idx++; \
+#define IO_STAT_RECORD(_field_) \
+ do { \
+ if (rocksdb::get_iostats_context()->_field_ > 0) { \
+ counters->m_value[idx] += rocksdb::get_iostats_context()->_field_; \
+ } \
+ idx++; \
} while (0)
static void harvest_diffs(Rdb_atomic_perf_counters *const counters) {
@@ -280,4 +282,4 @@ void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
#endif
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h
index 036c497c2f5..d8381b8ea94 100644
--- a/storage/rocksdb/rdb_perf_context.h
+++ b/storage/rocksdb/rdb_perf_context.h
@@ -22,8 +22,8 @@
#include <string>
/* MySQL header files */
-#include "./handler.h"
#include <my_global.h>
+#include "./handler.h"
#include "rdb_mariadb_port.h"
@@ -108,7 +108,7 @@ class Rdb_perf_counters {
Rdb_perf_counters(const Rdb_perf_counters &) = delete;
Rdb_perf_counters &operator=(const Rdb_perf_counters &) = delete;
-public:
+ public:
Rdb_perf_counters() = default;
uint64_t m_value[PC_MAX_IDX];
@@ -158,8 +158,11 @@ class Rdb_io_perf {
void end_and_record(const uint32_t perf_context_level);
explicit Rdb_io_perf()
- : m_atomic_counters(nullptr), m_shared_io_perf_read(nullptr),
- m_stats(nullptr), io_write_bytes(0), io_write_requests(0) {}
+ : m_atomic_counters(nullptr),
+ m_shared_io_perf_read(nullptr),
+ m_stats(nullptr),
+ io_write_bytes(0),
+ io_write_requests(0) {}
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_psi.cc b/storage/rocksdb/rdb_psi.cc
index 361a648bba4..77003b1bb48 100644
--- a/storage/rocksdb/rdb_psi.cc
+++ b/storage/rocksdb/rdb_psi.cc
@@ -14,7 +14,7 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
+#pragma implementation // gcc: Class implementation
#endif
#define MYSQL_SERVER 1
@@ -22,9 +22,6 @@
/* The C++ file's header */
#include "./rdb_psi.h"
-/* MySQL header files */
-#include <mysql/psi/mysql_stage.h>
-
namespace myrocks {
/*
@@ -94,17 +91,14 @@ void init_rocksdb_psi_keys() {
const char *const category = "rocksdb";
int count;
- if (PSI_server == nullptr)
- return;
-
count = array_elements(all_rocksdb_mutexes);
- PSI_server->register_mutex(category, all_rocksdb_mutexes, count);
+ mysql_mutex_register(category, all_rocksdb_mutexes, count);
count = array_elements(all_rocksdb_rwlocks);
- PSI_server->register_rwlock(category, all_rocksdb_rwlocks, count);
+ mysql_rwlock_register(category, all_rocksdb_rwlocks, count);
count = array_elements(all_rocksdb_conds);
- //TODO Disabling PFS for conditions due to the bug
+ // TODO(jay) Disabling PFS for conditions due to the bug
// https://github.com/MySQLOnRocksDB/mysql-5.6/issues/92
// PSI_server->register_cond(category, all_rocksdb_conds, count);
@@ -114,7 +108,7 @@ void init_rocksdb_psi_keys() {
count = array_elements(all_rocksdb_threads);
mysql_thread_register(category, all_rocksdb_threads, count);
}
-#else // HAVE_PSI_INTERFACE
+#else // HAVE_PSI_INTERFACE
void init_rocksdb_psi_keys() {}
#endif // HAVE_PSI_INTERFACE
diff --git a/storage/rocksdb/rdb_psi.h b/storage/rocksdb/rdb_psi.h
index e0d6e7e3a47..2703837a156 100644
--- a/storage/rocksdb/rdb_psi.h
+++ b/storage/rocksdb/rdb_psi.h
@@ -20,7 +20,8 @@
/* MySQL header files */
#include <my_global.h>
#include <my_pthread.h>
-#include <mysql/psi/psi.h>
+
+#include <mysql/psi/mysql_stage.h>
/* MyRocks header files */
#include "./rdb_utils.h"
diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc
index 41cd3c51307..8b7886667eb 100644
--- a/storage/rocksdb/rdb_sst_info.cc
+++ b/storage/rocksdb/rdb_sst_info.cc
@@ -51,8 +51,13 @@ Rdb_sst_file_ordered::Rdb_sst_file::Rdb_sst_file(
rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
const rocksdb::DBOptions &db_options, const std::string &name,
const bool tracing)
- : m_db(db), m_cf(cf), m_db_options(db_options), m_sst_file_writer(nullptr),
- m_name(name), m_tracing(tracing), m_comparator(cf->GetComparator()) {
+ : m_db(db),
+ m_cf(cf),
+ m_db_options(db_options),
+ m_sst_file_writer(nullptr),
+ m_name(name),
+ m_tracing(tracing),
+ m_comparator(cf->GetComparator()) {
DBUG_ASSERT(db != nullptr);
DBUG_ASSERT(cf != nullptr);
}
@@ -61,11 +66,6 @@ Rdb_sst_file_ordered::Rdb_sst_file::~Rdb_sst_file() {
// Make sure we clean up
delete m_sst_file_writer;
m_sst_file_writer = nullptr;
-
- // In case something went wrong attempt to delete the temporary file.
- // If everything went fine that file will have been renamed and this
- // function call will fail.
- std::remove(m_name.c_str());
}
rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::open() {
@@ -102,9 +102,8 @@ rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::open() {
return s;
}
-rocksdb::Status
-Rdb_sst_file_ordered::Rdb_sst_file::put(const rocksdb::Slice &key,
- const rocksdb::Slice &value) {
+rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::put(
+ const rocksdb::Slice &key, const rocksdb::Slice &value) {
DBUG_ASSERT(m_sst_file_writer != nullptr);
#ifdef __GNUC__
@@ -115,8 +114,8 @@ Rdb_sst_file_ordered::Rdb_sst_file::put(const rocksdb::Slice &key,
return m_sst_file_writer->Add(key, value);
}
-std::string
-Rdb_sst_file_ordered::Rdb_sst_file::generateKey(const std::string &key) {
+std::string Rdb_sst_file_ordered::Rdb_sst_file::generateKey(
+ const std::string &key) {
static char const hexdigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
@@ -137,7 +136,7 @@ rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::commit() {
DBUG_ASSERT(m_sst_file_writer != nullptr);
rocksdb::Status s;
- rocksdb::ExternalSstFileInfo fileinfo; /// Finish may should be modified
+ rocksdb::ExternalSstFileInfo fileinfo; /// Finish may should be modified
// Close out the sst file
s = m_sst_file_writer->Finish(&fileinfo);
@@ -150,30 +149,15 @@ rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::commit() {
if (s.ok()) {
if (m_tracing) {
// NO_LINT_DEBUG
- sql_print_information("SST Tracing: Adding file %s, smallest key: %s, "
- "largest key: %s, file size: %" PRIu64 ", "
- "num_entries: %" PRIu64,
- fileinfo.file_path.c_str(),
- generateKey(fileinfo.smallest_key).c_str(),
- generateKey(fileinfo.largest_key).c_str(),
- fileinfo.file_size, fileinfo.num_entries);
- }
-
- // Add the file to the database
- // Set the snapshot_consistency parameter to false since no one
- // should be accessing the table we are bulk loading
- rocksdb::IngestExternalFileOptions opts;
- opts.move_files = true;
- opts.snapshot_consistency = false;
- opts.allow_global_seqno = false;
- opts.allow_blocking_flush = false;
- s = m_db->IngestExternalFile(m_cf, {m_name}, opts);
-
- if (m_tracing) {
- // NO_LINT_DEBUG
- sql_print_information("SST Tracing: AddFile(%s) returned %s",
- fileinfo.file_path.c_str(),
- s.ok() ? "ok" : "not ok");
+ sql_print_information(
+ "SST Tracing: Adding file %s, smallest key: %s, "
+ "largest key: %s, file size: %" PRIu64
+ ", "
+ "num_entries: %" PRIu64,
+ fileinfo.file_path.c_str(),
+ generateKey(fileinfo.smallest_key).c_str(),
+ generateKey(fileinfo.largest_key).c_str(), fileinfo.file_size,
+ fileinfo.num_entries);
}
}
@@ -219,7 +203,9 @@ Rdb_sst_file_ordered::Rdb_sst_file_ordered(
rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
const rocksdb::DBOptions &db_options, const std::string &name,
const bool tracing, size_t max_size)
- : m_use_stack(false), m_first(true), m_stack(max_size),
+ : m_use_stack(false),
+ m_first(true),
+ m_stack(max_size),
m_file(db, cf, db_options, name, tracing) {
m_stack.reset();
}
@@ -326,21 +312,26 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
const std::string &indexname,
rocksdb::ColumnFamilyHandle *const cf,
const rocksdb::DBOptions &db_options,
- const bool &tracing)
- : m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0),
- m_sst_count(0), m_background_error(HA_EXIT_SUCCESS), m_committed(false),
-#if defined(RDB_SST_INFO_USE_THREAD)
- m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false),
-#endif
- m_sst_file(nullptr), m_tracing(tracing), m_print_client_error(true) {
+ const bool tracing)
+ : m_db(db),
+ m_cf(cf),
+ m_db_options(db_options),
+ m_curr_size(0),
+ m_sst_count(0),
+ m_background_error(HA_EXIT_SUCCESS),
+ m_done(false),
+ m_sst_file(nullptr),
+ m_tracing(tracing),
+ m_print_client_error(true) {
m_prefix = db->GetName() + "/";
std::string normalized_table;
if (rdb_normalize_tablename(tablename.c_str(), &normalized_table)) {
// We failed to get a normalized table name. This should never happen,
// but handle it anyway.
- m_prefix += "fallback_" + std::to_string(reinterpret_cast<intptr_t>(
- reinterpret_cast<void *>(this))) +
+ m_prefix += "fallback_" +
+ std::to_string(reinterpret_cast<intptr_t>(
+ reinterpret_cast<void *>(this))) +
"_" + indexname + "_";
} else {
m_prefix += normalized_table + "_" + indexname + "_";
@@ -364,9 +355,15 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
Rdb_sst_info::~Rdb_sst_info() {
DBUG_ASSERT(m_sst_file == nullptr);
-#if defined(RDB_SST_INFO_USE_THREAD)
- DBUG_ASSERT(m_thread == nullptr);
-#endif
+
+ for (auto sst_file : m_committed_files) {
+ // In case something went wrong attempt to delete the temporary file.
+ // If everything went fine that file will have been renamed and this
+ // function call will fail.
+ std::remove(sst_file.c_str());
+ }
+ m_committed_files.clear();
+
mysql_mutex_destroy(&m_commit_mutex);
}
@@ -377,8 +374,8 @@ int Rdb_sst_info::open_new_sst_file() {
const std::string name = m_prefix + std::to_string(m_sst_count++) + m_suffix;
// Create the new sst file object
- m_sst_file = new Rdb_sst_file_ordered(m_db, m_cf, m_db_options,
- name, m_tracing, m_max_size);
+ m_sst_file = new Rdb_sst_file_ordered(m_db, m_cf, m_db_options, name,
+ m_tracing, m_max_size);
// Open the sst file
const rocksdb::Status s = m_sst_file->open();
@@ -394,35 +391,23 @@ int Rdb_sst_info::open_new_sst_file() {
return HA_EXIT_SUCCESS;
}
-void Rdb_sst_info::close_curr_sst_file() {
- DBUG_ASSERT(m_sst_file != nullptr);
- DBUG_ASSERT(m_curr_size > 0);
-
-#if defined(RDB_SST_INFO_USE_THREAD)
- if (m_thread == nullptr) {
- // We haven't already started a background thread, so start one
- m_thread = new std::thread(thread_fcn, this);
+void Rdb_sst_info::commit_sst_file(Rdb_sst_file_ordered *sst_file) {
+ const rocksdb::Status s = sst_file->commit();
+ if (!s.ok()) {
+ set_error_msg(sst_file->get_name(), s);
+ set_background_error(HA_ERR_ROCKSDB_BULK_LOAD);
}
- DBUG_ASSERT(m_thread != nullptr);
+ m_committed_files.push_back(sst_file->get_name());
- {
- // Add this finished sst file to the queue (while holding mutex)
- const std::lock_guard<std::mutex> guard(m_mutex);
- m_queue.push(m_sst_file);
- }
+ delete sst_file;
+}
- // Notify the background thread that there is a new entry in the queue
- m_cond.notify_one();
-#else
- const rocksdb::Status s = m_sst_file->commit();
- if (!s.ok()) {
- set_error_msg(m_sst_file->get_name(), s);
- set_background_error(HA_ERR_ROCKSDB_BULK_LOAD);
- }
+void Rdb_sst_info::close_curr_sst_file() {
+ DBUG_ASSERT(m_sst_file != nullptr);
+ DBUG_ASSERT(m_curr_size > 0);
- delete m_sst_file;
-#endif
+ commit_sst_file(m_sst_file);
// Reset for next sst file
m_sst_file = nullptr;
@@ -432,7 +417,7 @@ void Rdb_sst_info::close_curr_sst_file() {
int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
int rc;
- DBUG_ASSERT(!m_committed);
+ DBUG_ASSERT(!m_done);
if (m_curr_size + key.size() + value.size() >= m_max_size) {
// The current sst file has reached its maximum, close it out
@@ -467,15 +452,22 @@ int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
return HA_EXIT_SUCCESS;
}
-int Rdb_sst_info::commit(bool print_client_error) {
+/*
+ Finish the current work and return the list of SST files ready to be
+ ingested. This function need to be idempotent and atomic
+ */
+int Rdb_sst_info::finish(Rdb_sst_commit_info *commit_info,
+ bool print_client_error) {
int ret = HA_EXIT_SUCCESS;
// Both the transaction clean up and the ha_rocksdb handler have
// references to this Rdb_sst_info and both can call commit, so
// synchronize on the object here.
+ // This also means in such case the bulk loading operation stop being truly
+ // atomic, and we should consider fixing this in the future
RDB_MUTEX_LOCK_CHECK(m_commit_mutex);
- if (m_committed) {
+ if (is_done()) {
RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
return ret;
}
@@ -487,20 +479,13 @@ int Rdb_sst_info::commit(bool print_client_error) {
close_curr_sst_file();
}
-#if defined(RDB_SST_INFO_USE_THREAD)
- if (m_thread != nullptr) {
- // Tell the background thread we are done
- m_finished = true;
- m_cond.notify_one();
+ // This checks out the list of files so that the caller can collect/group
+ // them and ingest them all in one go, and any racing calls to commit
+ // won't see them at all
+ commit_info->init(m_cf, std::move(m_committed_files));
+ DBUG_ASSERT(m_committed_files.size() == 0);
- // Wait for the background thread to finish
- m_thread->join();
- delete m_thread;
- m_thread = nullptr;
- }
-#endif
-
- m_committed = true;
+ m_done = true;
RDB_MUTEX_UNLOCK_CHECK(m_commit_mutex);
// Did we get any errors?
@@ -514,16 +499,13 @@ int Rdb_sst_info::commit(bool print_client_error) {
void Rdb_sst_info::set_error_msg(const std::string &sst_file_name,
const rocksdb::Status &s) {
+ if (!m_print_client_error) return;
- if (!m_print_client_error)
- return;
+ report_error_msg(s, sst_file_name.c_str());
+}
-#if defined(RDB_SST_INFO_USE_THREAD)
- // Both the foreground and background threads can set the error message
- // so lock the mutex to protect it. We only want the first error that
- // we encounter.
- const std::lock_guard<std::mutex> guard(m_mutex);
-#endif
+void Rdb_sst_info::report_error_msg(const rocksdb::Status &s,
+ const char *sst_file_name) {
if (s.IsInvalidArgument() &&
strcmp(s.getState(), "Keys must be added in order") == 0) {
my_printf_error(ER_KEYS_OUT_OF_ORDER,
@@ -533,57 +515,16 @@ void Rdb_sst_info::set_error_msg(const std::string &sst_file_name,
} else if (s.IsInvalidArgument() &&
strcmp(s.getState(), "Global seqno is required, but disabled") ==
0) {
- my_printf_error(ER_OVERLAPPING_KEYS, "Rows inserted during bulk load "
- "must not overlap existing rows",
+ my_printf_error(ER_OVERLAPPING_KEYS,
+ "Rows inserted during bulk load "
+ "must not overlap existing rows",
MYF(0));
} else {
my_printf_error(ER_UNKNOWN_ERROR, "[%s] bulk load error: %s", MYF(0),
- sst_file_name.c_str(), s.ToString().c_str());
+ sst_file_name, s.ToString().c_str());
}
}
-#if defined(RDB_SST_INFO_USE_THREAD)
-// Static thread function - the Rdb_sst_info object is in 'object'
-void Rdb_sst_info::thread_fcn(void *object) {
- reinterpret_cast<Rdb_sst_info *>(object)->run_thread();
-}
-
-void Rdb_sst_info::run_thread() {
- std::unique_lock<std::mutex> lk(m_mutex);
-
- do {
- // Wait for notification or 1 second to pass
- m_cond.wait_for(lk, std::chrono::seconds(1));
-
- // Inner loop pulls off all Rdb_sst_file_ordered entries and processes them
- while (!m_queue.empty()) {
- Rdb_sst_file_ordered *const sst_file = m_queue.front();
- m_queue.pop();
-
- // Release the lock - we don't want to hold it while committing the file
- lk.unlock();
-
- // Close out the sst file and add it to the database
- const rocksdb::Status s = sst_file->commit();
- if (!s.ok()) {
- set_error_msg(sst_file->get_name(), s);
- set_background_error(HA_ERR_ROCKSDB_BULK_LOAD);
- }
-
- delete sst_file;
-
- // Reacquire the lock for the next inner loop iteration
- lk.lock();
- }
-
- // If the queue is empty and the main thread has indicated we should exit
- // break out of the loop.
- } while (!m_finished);
-
- DBUG_ASSERT(m_queue.empty());
-}
-#endif
-
void Rdb_sst_info::init(const rocksdb::DB *const db) {
const std::string path = db->GetName() + FN_DIRSEP;
struct st_my_dir *const dir_info = my_dir(path.c_str(), MYF(MY_DONT_SORT));
@@ -615,4 +556,4 @@ void Rdb_sst_info::init(const rocksdb::DB *const db) {
std::atomic<uint64_t> Rdb_sst_info::m_prefix_counter(0);
std::string Rdb_sst_info::m_suffix = ".bulk_load.tmp";
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h
index f50645b1eeb..66da3b7c1e7 100644
--- a/storage/rocksdb/rdb_sst_info.h
+++ b/storage/rocksdb/rdb_sst_info.h
@@ -34,8 +34,6 @@
/* MyRocks header files */
#include "./rdb_utils.h"
-// #define RDB_SST_INFO_USE_THREAD /* uncomment to use threads */
-
namespace myrocks {
class Rdb_sst_file_ordered {
@@ -125,43 +123,114 @@ class Rdb_sst_info {
uint64_t m_max_size;
uint32_t m_sst_count;
std::atomic<int> m_background_error;
+ bool m_done;
std::string m_prefix;
static std::atomic<uint64_t> m_prefix_counter;
static std::string m_suffix;
- bool m_committed;
mysql_mutex_t m_commit_mutex;
-#if defined(RDB_SST_INFO_USE_THREAD)
- std::queue<Rdb_sst_file_ordered *> m_queue;
- std::mutex m_mutex;
- std::condition_variable m_cond;
- std::thread *m_thread;
- bool m_finished;
-#endif
Rdb_sst_file_ordered *m_sst_file;
+
+ // List of committed SST files - we'll ingest them later in one single batch
+ std::vector<std::string> m_committed_files;
+
const bool m_tracing;
bool m_print_client_error;
int open_new_sst_file();
void close_curr_sst_file();
+ void commit_sst_file(Rdb_sst_file_ordered *sst_file);
+
void set_error_msg(const std::string &sst_file_name,
const rocksdb::Status &s);
-#if defined(RDB_SST_INFO_USE_THREAD)
- void run_thread();
-
- static void thread_fcn(void *object);
-#endif
-
public:
Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
const std::string &indexname,
rocksdb::ColumnFamilyHandle *const cf,
- const rocksdb::DBOptions &db_options, const bool &tracing);
+ const rocksdb::DBOptions &db_options, const bool tracing);
~Rdb_sst_info();
+ /*
+ This is the unit of work returned from Rdb_sst_info::finish and represents
+ a group of SST to be ingested atomically with other Rdb_sst_commit_info.
+ This is always local to the bulk loading complete operation so no locking
+ is required
+ */
+ class Rdb_sst_commit_info {
+ public:
+ Rdb_sst_commit_info() : m_committed(true), m_cf(nullptr) {}
+
+ Rdb_sst_commit_info(Rdb_sst_commit_info &&rhs) noexcept
+ : m_committed(rhs.m_committed),
+ m_cf(rhs.m_cf),
+ m_committed_files(std::move(rhs.m_committed_files)) {
+ rhs.m_committed = true;
+ rhs.m_cf = nullptr;
+ }
+
+ Rdb_sst_commit_info &operator=(Rdb_sst_commit_info &&rhs) noexcept {
+ reset();
+
+ m_cf = rhs.m_cf;
+ m_committed_files = std::move(rhs.m_committed_files);
+ m_committed = rhs.m_committed;
+
+ rhs.m_committed = true;
+ rhs.m_cf = nullptr;
+
+ return *this;
+ }
+
+ Rdb_sst_commit_info(const Rdb_sst_commit_info &) = delete;
+ Rdb_sst_commit_info &operator=(const Rdb_sst_commit_info &) = delete;
+
+ ~Rdb_sst_commit_info() { reset(); }
+
+ void reset() {
+ if (!m_committed) {
+ for (auto sst_file : m_committed_files) {
+ // In case something went wrong attempt to delete the temporary file.
+ // If everything went fine that file will have been renamed and this
+ // function call will fail.
+ std::remove(sst_file.c_str());
+ }
+ }
+ m_committed_files.clear();
+ m_cf = nullptr;
+ m_committed = true;
+ }
+
+ bool has_work() const {
+ return m_cf != nullptr && m_committed_files.size() > 0;
+ }
+
+ void init(rocksdb::ColumnFamilyHandle *cf,
+ std::vector<std::string> &&files) {
+ DBUG_ASSERT(m_cf == nullptr && m_committed_files.size() == 0 &&
+ m_committed);
+ m_cf = cf;
+ m_committed_files = std::move(files);
+ m_committed = false;
+ }
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf; }
+
+ const std::vector<std::string> &get_committed_files() const {
+ return m_committed_files;
+ }
+
+ void commit() { m_committed = true; }
+
+ private:
+ bool m_committed;
+ rocksdb::ColumnFamilyHandle *m_cf;
+ std::vector<std::string> m_committed_files;
+ };
+
int put(const rocksdb::Slice &key, const rocksdb::Slice &value);
- int commit(bool print_client_error = true);
- bool is_committed() const { return m_committed; }
+ int finish(Rdb_sst_commit_info *commit_info, bool print_client_error = true);
+
+ bool is_done() const { return m_done; }
bool have_background_error() { return m_background_error != 0; }
@@ -180,7 +249,17 @@ class Rdb_sst_info {
m_background_error.compare_exchange_strong(expected, code);
}
+ /** Return the list of committed files later to be ingested **/
+ const std::vector<std::string> &get_committed_files() {
+ return m_committed_files;
+ }
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf; }
+
static void init(const rocksdb::DB *const db);
+
+ static void report_error_msg(const rocksdb::Status &s,
+ const char *sst_file_name);
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_threads.cc b/storage/rocksdb/rdb_threads.cc
index 2214ce1a043..6f2377faff3 100644
--- a/storage/rocksdb/rdb_threads.cc
+++ b/storage/rocksdb/rdb_threads.cc
@@ -16,7 +16,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
+#pragma implementation // gcc: Class implementation
#endif
#include <my_config.h>
@@ -42,7 +42,7 @@ void Rdb_thread::init(
my_core::PSI_mutex_key stop_bg_psi_mutex_key,
my_core::PSI_cond_key stop_bg_psi_cond_key
#endif
- ) {
+) {
DBUG_ASSERT(!m_run_once);
mysql_mutex_init(stop_bg_psi_mutex_key, &m_signal_mutex, MY_MUTEX_INIT_FAST);
mysql_cond_init(stop_bg_psi_cond_key, &m_signal_cond, nullptr);
@@ -58,7 +58,7 @@ int Rdb_thread::create_thread(const std::string &thread_name
,
PSI_thread_key background_psi_thread_key
#endif
- ) {
+) {
// Make a copy of the name so we can return without worrying that the
// caller will free the memory
m_name = thread_name;
@@ -68,7 +68,7 @@ int Rdb_thread::create_thread(const std::string &thread_name
}
-void Rdb_thread::signal(const bool &stop_thread) {
+void Rdb_thread::signal(const bool stop_thread) {
RDB_MUTEX_LOCK_CHECK(m_signal_mutex);
if (stop_thread) {
@@ -80,4 +80,4 @@ void Rdb_thread::signal(const bool &stop_thread) {
RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex);
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_threads.h b/storage/rocksdb/rdb_threads.h
index b3331db1738..7d89fe0616b 100644
--- a/storage/rocksdb/rdb_threads.h
+++ b/storage/rocksdb/rdb_threads.h
@@ -34,9 +34,7 @@
#undef pthread_getspecific
#endif
#include <mysql/psi/mysql_table.h>
-#ifdef MARIAROCKS_NOT_YET
-#include <mysql/thread_pool_priv.h>
-#endif
+// #include <mysql/thread_pool_priv.h>
/* MyRocks header files */
#include "./rdb_utils.h"
@@ -45,7 +43,7 @@
namespace myrocks {
class Rdb_thread {
-private:
+ private:
// Disable Copying
Rdb_thread(const Rdb_thread &);
Rdb_thread &operator=(const Rdb_thread &);
@@ -57,12 +55,12 @@ private:
std::string m_name;
-protected:
+ protected:
mysql_mutex_t m_signal_mutex;
mysql_cond_t m_signal_cond;
bool m_stop = false;
-public:
+ public:
Rdb_thread() : m_run_once(false) {}
#ifdef HAVE_PSI_INTERFACE
@@ -77,7 +75,7 @@ public:
virtual void run(void) = 0;
- void signal(const bool &stop_thread = false);
+ void signal(const bool stop_thread = false);
int join()
{
@@ -116,8 +114,7 @@ public:
DBUG_ASSERT(!m_name.empty());
#ifdef __linux__
int err = pthread_setname_np(m_handle, m_name.c_str());
- if (err)
- {
+ if (err) {
// NO_LINT_DEBUG
sql_print_warning(
"MyRocks: Failed to set name (%s) for current thread, errno=%d,%d",
@@ -130,7 +127,7 @@ public:
virtual ~Rdb_thread() {}
-private:
+ private:
static void *thread_func(void *const thread_ptr);
};
@@ -141,7 +138,7 @@ private:
*/
class Rdb_background_thread : public Rdb_thread {
-private:
+ private:
bool m_save_stats = false;
void reset() {
@@ -150,7 +147,7 @@ private:
m_save_stats = false;
}
-public:
+ public:
virtual void run() override;
void request_save_stats() {
@@ -195,4 +192,4 @@ struct Rdb_drop_index_thread : public Rdb_thread {
virtual void run() override;
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc
index 9bc93dfecec..85eed64775c 100644
--- a/storage/rocksdb/rdb_utils.cc
+++ b/storage/rocksdb/rdb_utils.cc
@@ -21,9 +21,9 @@
/* C++ standard header files */
#include <array>
+#include <sstream>
#include <string>
#include <vector>
-#include <sstream>
/* C standard header files */
#include <ctype.h>
@@ -209,8 +209,8 @@ const char *rdb_skip_id(const struct charset_info_st *const cs,
/*
Parses a given string into tokens (if any) separated by a specific delimiter.
*/
-const std::vector<std::string> parse_into_tokens(
- const std::string& s, const char delim) {
+const std::vector<std::string> parse_into_tokens(const std::string &s,
+ const char delim) {
std::vector<std::string> tokens;
std::string t;
std::stringstream ss(s);
@@ -338,14 +338,18 @@ void rdb_persist_corruption_marker() {
/* O_SYNC is not supported on windows */
int fd = my_open(fileName.c_str(), O_CREAT | IF_WIN(0, O_SYNC), MYF(MY_WME));
if (fd < 0) {
- sql_print_error("RocksDB: Can't create file %s to mark rocksdb as "
- "corrupted.",
- fileName.c_str());
+ // NO_LINT_DEBUG
+ sql_print_error(
+ "RocksDB: Can't create file %s to mark rocksdb as "
+ "corrupted.",
+ fileName.c_str());
} else {
- sql_print_information("RocksDB: Creating the file %s to abort mysqld "
- "restarts. Remove this file from the data directory "
- "after fixing the corruption to recover. ",
- fileName.c_str());
+ // NO_LINT_DEBUG
+ sql_print_information(
+ "RocksDB: Creating the file %s to abort mysqld "
+ "restarts. Remove this file from the data directory "
+ "after fixing the corruption to recover. ",
+ fileName.c_str());
}
#ifdef _WIN32
@@ -362,4 +366,4 @@ void rdb_persist_corruption_marker() {
}
}
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h
index 44d90d78437..0ef74b9fd06 100644
--- a/storage/rocksdb/rdb_utils.h
+++ b/storage/rocksdb/rdb_utils.h
@@ -21,6 +21,7 @@
#include <chrono>
#include <string>
#include <vector>
+#include <functional>
/* MySQL header files */
#include "../sql/log.h"
@@ -44,7 +45,7 @@ namespace myrocks {
#ifndef interface
#define interface struct
-#endif // interface
+#endif // interface
/*
Introduce C-style pseudo-namespaces, a handy way to make code more readble
@@ -62,13 +63,13 @@ namespace myrocks {
// to non-obvious MySQL functions, like the ones that do not start with well
// known prefixes: "my_", "sql_", and "mysql_".
#define my_core
-#endif // my_core
+#endif // my_core
/*
The intent behind a SHIP_ASSERT() macro is to have a mechanism for validating
invariants in retail builds. Traditionally assertions (such as macros defined
in <cassert>) are evaluated for performance reasons only in debug builds and
- become NOOP in retail builds when NDEBUG is defined.
+ become NOOP in retail builds when DBUG_OFF is defined.
This macro is intended to validate the invariants which are critical for
making sure that data corruption and data loss won't take place. Proper
@@ -80,14 +81,14 @@ namespace myrocks {
*/
#ifndef SHIP_ASSERT
-#define SHIP_ASSERT(expr) \
- do { \
- if (!(expr)) { \
- my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \
- abort(); \
- } \
+#define SHIP_ASSERT(expr) \
+ do { \
+ if (!(expr)) { \
+ my_safe_printf_stderr("\nShip assert failure: \'%s\'\n", #expr); \
+ abort(); \
+ } \
} while (0)
-#endif // SHIP_ASSERT
+#endif // SHIP_ASSERT
/*
Assert a implies b.
@@ -103,7 +104,7 @@ namespace myrocks {
a and b must be both true or both false.
*/
#ifndef DBUG_ASSERT_IFF
-#define DBUG_ASSERT_IFF(a, b) \
+#define DBUG_ASSERT_IFF(a, b) \
DBUG_ASSERT(static_cast<bool>(a) == static_cast<bool>(b))
#endif
@@ -151,10 +152,10 @@ namespace myrocks {
Macros to better convey the intent behind checking the results from locking
and unlocking mutexes.
*/
-#define RDB_MUTEX_LOCK_CHECK(m) \
+#define RDB_MUTEX_LOCK_CHECK(m) \
rdb_check_mutex_call_result(__MYROCKS_PORTABLE_PRETTY_FUNCTION__, true, \
mysql_mutex_lock(&m))
-#define RDB_MUTEX_UNLOCK_CHECK(m) \
+#define RDB_MUTEX_UNLOCK_CHECK(m) \
rdb_check_mutex_call_result(__MYROCKS_PORTABLE_PRETTY_FUNCTION__, false, \
mysql_mutex_unlock(&m))
@@ -243,10 +244,10 @@ inline void rdb_check_mutex_call_result(const char *function_name,
const int result) {
if (unlikely(result)) {
/* NO_LINT_DEBUG */
- sql_print_error("%s a mutex inside %s failed with an "
- "error code %d.",
- attempt_lock ? "Locking" : "Unlocking", function_name,
- result);
+ sql_print_error(
+ "%s a mutex inside %s failed with an "
+ "error code %d.",
+ attempt_lock ? "Locking" : "Unlocking", function_name, result);
// This will hopefully result in a meaningful stack trace which we can use
// to efficiently debug the root cause.
@@ -291,7 +292,7 @@ const char *rdb_parse_id(const struct charset_info_st *const cs,
const char *rdb_skip_id(const struct charset_info_st *const cs, const char *str)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
-const std::vector<std::string> parse_into_tokens(const std::string& s,
+const std::vector<std::string> parse_into_tokens(const std::string &s,
const char delim);
/*
@@ -309,4 +310,26 @@ bool rdb_database_exists(const std::string &db_name);
const char *get_rocksdb_supported_compression_types();
-} // namespace myrocks
+/*
+ Helper class to make sure cleanup always happens. Helpful for complicated
+ logic where there can be multiple exits/returns requiring cleanup
+ */
+class Ensure_cleanup {
+ public:
+ explicit Ensure_cleanup(std::function<void()> cleanup)
+ : m_cleanup(cleanup), m_skip_cleanup(false) {}
+
+ ~Ensure_cleanup() {
+ if (!m_skip_cleanup) {
+ m_cleanup();
+ }
+ }
+
+ // If you want to skip cleanup (such as when the operation is successful)
+ void skip() { m_skip_cleanup = true; }
+
+ private:
+ std::function<void()> m_cleanup;
+ bool m_skip_cleanup;
+};
+} // namespace myrocks
diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb
-Subproject 926f3a78a64b327475ee6c60b6c8ab4f3425320
+Subproject ba64a4cf52cce5cf180135e5aeddaa90b7887f9