summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2017-09-18 14:06:01 +0300
committerSergei Petrunia <psergey@askmonty.org>2017-09-18 14:06:01 +0300
commitba3209e219370342c0a0c48324d31e8e2611ecde (patch)
tree88ed4442b23c6559952a766bc5ca4f53eeb2360c
parent5ccaabe9620bcf9e960ae019d7e27e5998a772bd (diff)
parent3fae64b196cfb94ac4084d02c5745285589c6b48 (diff)
downloadmariadb-git-ba3209e219370342c0a0c48324d31e8e2611ecde.tar.gz
Merge mergetrees/merge-myrocks into bb-10.2-mariarocks-merge
Upstream cset we are merging from: commit 184a4a2d82f4f6f3cbcb1015bcdb32bebe73315c Author: Abhinav Sharma <abhinavsharma@fb.com> Date: Thu Sep 14 11:40:08 2017 -0700 Bump rocksdb submodule Summary: Bump rocksdb to include the fix for rocksdb.trx_info_rpl Lots of conflicts, got the code to compile but tests are likely to be broken
-rw-r--r--sql/share/errmsg-utf8.txt61
-rw-r--r--storage/rocksdb/CMakeLists.txt52
-rw-r--r--storage/rocksdb/README24
-rw-r--r--storage/rocksdb/build_rocksdb.cmake109
-rw-r--r--storage/rocksdb/ha_rocksdb.cc3542
-rw-r--r--storage/rocksdb/ha_rocksdb.h248
-rw-r--r--storage/rocksdb/ha_rocksdb_proto.h15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/my.cnf3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result56
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result796
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result796
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result28
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result45
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result101
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result12
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result73
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result1009
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result51
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result490
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/hermitage.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result1419
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_primary.result23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/information_schema.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue111.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result161
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/issue255.result21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result27
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result37
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result397
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result76
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result130
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/show_engine.result179
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/singledelete.result22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result489
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result238
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result256
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result709
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result494
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result82
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result389
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result35
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result260
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/write_sync.result31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test55
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test23
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc156
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test119
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test65
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test136
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/collation.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test153
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/delete.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table.test10
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test11
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test120
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index.inc34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test109
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test70
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/index_primary.test32
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/information_schema.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/insert.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test80
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue255.test19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/issue314.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test34
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test96
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test48
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/show_engine.test13
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/singledelete.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test545
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test371
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test253
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test780
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test500
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test87
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test300
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test8
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/update.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test131
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/t/write_sync.test31
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh16
-rwxr-xr-xstorage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh31
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result9
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result39
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result24
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result25
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt (renamed from storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-mater.opt)0
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test29
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test5
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test72
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test44
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result63
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_background_sync_basic.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_base_background_compactions_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result100
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result79
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_disable_2pc_basic.result)0
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result63
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result42
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result68
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result50
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result86
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result64
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result14
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_flushes_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result46
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result53
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result93
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result97
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result85
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result108
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result19
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_writes_basic.result7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test1
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_base_background_compactions_basic.test7
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_disable_2pc_basic.test)0
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_background_sync_basic.test)4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test15
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test3
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test4
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test20
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test18
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_flushes_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test (renamed from storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_compactions_basic.test)2
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test17
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test49
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test22
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test94
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test21
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_writes_basic.test6
-rw-r--r--storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test26
-rw-r--r--storage/rocksdb/rdb_buff.h26
-rw-r--r--storage/rocksdb/rdb_cf_manager.cc87
-rw-r--r--storage/rocksdb/rdb_cf_manager.h27
-rw-r--r--storage/rocksdb/rdb_cf_options.cc49
-rw-r--r--storage/rocksdb/rdb_cf_options.h9
-rw-r--r--storage/rocksdb/rdb_compact_filter.h125
-rw-r--r--storage/rocksdb/rdb_datadic.cc1539
-rw-r--r--storage/rocksdb/rdb_datadic.h377
-rw-r--r--storage/rocksdb/rdb_i_s.cc361
-rw-r--r--storage/rocksdb/rdb_index_merge.cc208
-rw-r--r--storage/rocksdb/rdb_index_merge.h70
-rw-r--r--storage/rocksdb/rdb_io_watchdog.cc233
-rw-r--r--storage/rocksdb/rdb_io_watchdog.h113
-rw-r--r--storage/rocksdb/rdb_mariadb_server_port.h33
-rw-r--r--storage/rocksdb/rdb_perf_context.cc56
-rw-r--r--storage/rocksdb/rdb_perf_context.h16
-rw-r--r--storage/rocksdb/rdb_sst_info.cc227
-rw-r--r--storage/rocksdb/rdb_sst_info.h130
-rw-r--r--storage/rocksdb/rdb_utils.cc14
-rw-r--r--storage/rocksdb/rdb_utils.h3
m---------storage/rocksdb/rocksdb0
-rw-r--r--storage/rocksdb/rocksdb-range-access.txt145
277 files changed, 22034 insertions, 2867 deletions
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 1f4cf315f1c..a42ed90fc9f 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -7455,3 +7455,64 @@ ER_SLAVE_SAME_ID
eng "A slave with the same server_uuid/server_id as this slave has connected to the master"
ER_FLASHBACK_NOT_SUPPORTED
eng "Flashback does not support %s %s"
+
+ER_ROLLBACK_ONLY
+ eng "This transaction was rolled back and cannot be committed. Only supported operation is to roll it back, so all pending changes will be discarded. Please restart another transaction."
+
+ER_ROLLBACK_TO_SAVEPOINT
+ eng "MyRocks currently does not support ROLLBACK TO SAVEPOINT if modifying rows."
+
+ER_KEYS_OUT_OF_ORDER
+ eng "Keys are out order during bulk load"
+ER_OVERLAPPING_KEYS
+ eng "Bulk load rows overlap existing rows"
+ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
+ eng "Only REPEATABLE READ isolation level is supported for START TRANSACTION WITH CONSISTENT SNAPSHOT in RocksDB Storage Engine."
+
+ER_CF_DIFFERENT
+ eng "Column family ('%s') flag (%d) is different from an existing flag (%d). Assign a new CF flag, or do not change existing CF flag."
+
+ER_RDB_TTL_DURATION_FORMAT
+ eng "TTL duration (%s) in MyRocks must be an unsigned non-null 64-bit integer."
+
+ER_RDB_STATUS_GENERAL
+ eng "Status error %d received from RocksDB: %s"
+
+ER_RDB_STATUS_MSG
+ eng "%s, Status error %d received from RocksDB: %s"
+
+ER_RDB_TTL_UNSUPPORTED
+ eng "TTL support is currently disabled when table has a hidden PK."
+
+ER_RDB_TTL_COL_FORMAT
+ eng "TTL column (%s) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration."
+
+ER_PER_INDEX_CF_DEPRECATED
+ eng "The per-index column family option has been deprecated"
+
+
+ER_UNSUPPORTED_COLLATION
+ eng "Unsupported collation on string indexed column %s.%s Use binary collation (%s)."
+
+ER_METADATA_INCONSISTENCY
+ eng "Table '%s' does not exist, but metadata information exists inside MyRocks. This is a sign of data inconsistency. Please check if '%s.frm' exists, and try to restore it if it does not exist."
+
+ER_ISOLATION_MODE_NOT_SUPPORTED
+ eng "MyRocks supports only READ COMMITTED and REPEATABLE READ isolation levels. Please change from current isolation level %s"
+
+ER_REQUIRE_ROW_BINLOG_FORMAT
+ eng "Can't execute updates on master with binlog_format != ROW."
+
+ER_ON_DUPLICATE_DISABLED
+ eng "When unique checking is disabled in MyRocks, INSERT,UPDATE,LOAD statements with clauses that update or replace the key (i.e. INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not allowed. Query: %s"
+ER_UPDATES_WITH_CONSISTENT_SNAPSHOT
+ eng "Can't execute updates when you started a transaction with START TRANSACTION WITH CONSISTENT [ROCKSDB] SNAPSHOT."
+
+ER_KEY_CREATE_DURING_ALTER
+ eng "MyRocks failed creating new key definitions during alter."
+
+ER_SK_POPULATE_DURING_ALTER
+ eng "MyRocks failed populating secondary key during alter."
+
+
+
diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt
index 3223e084709..f6d6621824b 100644
--- a/storage/rocksdb/CMakeLists.txt
+++ b/storage/rocksdb/CMakeLists.txt
@@ -16,6 +16,12 @@ IF (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/Makefile")
SKIP_ROCKSDB_PLUGIN("Missing Makefile in rocksdb directory. Try \"git submodule update\".")
ENDIF()
+CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
+IF(HAVE_SCHED_GETCPU)
+ ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1 -DROCKSDB_SCHED_GETCPU_PRESENT)
+ENDIF()
+
+
# We've had our builders hang during the build process. This prevents MariaRocks
# to be built on 32 bit intel OS kernels.
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "i[36]86")
@@ -84,6 +90,9 @@ SET(ROCKSDB_SE_SOURCES
rdb_psi.cc
)
+# This is a strong requirement coming from RocksDB. No conditional checks here.
+ADD_DEFINITIONS(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX
+)
MYSQL_ADD_PLUGIN(rocksdb ${ROCKSDB_SE_SOURCES} STORAGE_ENGINE
MODULE_OUTPUT_NAME ha_rocksdb
COMPONENT rocksdb-engine)
@@ -93,12 +102,32 @@ IF(NOT TARGET rocksdb)
RETURN()
ENDIF()
-# MARIAROCKS-TODO: ???
-CHECK_FUNCTION_EXISTS(fallocate HAVE_FALLOCATE)
-IF(HAVE_FALLOCATE)
- ADD_DEFINITIONS(-DROCKSDB_FALLOCATE_PRESENT)
+IF(UNIX)
+ IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ ADD_DEFINITIONS(-DOS_LINUX)
+
+ CHECK_INCLUDE_FILES(linux/falloc.h HAVE_LINUX_FALLOC_H)
+
+ CHECK_FUNCTION_EXISTS(fallocate HAVE_FALLOCATE)
+
+ IF(HAVE_FALLOCATE AND HAVE_LINUX_FALLOC_H)
+ ADD_DEFINITIONS(-DROCKSDB_FALLOCATE_PRESENT)
+ ENDIF()
+ ENDIF()
ENDIF()
+CHECK_CXX_SOURCE_COMPILES("
+#if defined(_MSC_VER) && !defined(__thread)
+#define __thread __declspec(thread)
+#endif
+int main() {
+ static __thread int tls;
+}
+" HAVE_THREAD_LOCAL)
+if(HAVE_THREAD_LOCAL)
+ ADD_DEFINITIONS(-DROCKSDB_SUPPORT_THREAD_LOCAL)
+endif()
+
INCLUDE(build_rocksdb.cmake)
ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib
@@ -147,6 +176,12 @@ IF(HAVE_SCHED_GETCPU)
ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1)
ENDIF()
+IF (NOT "$ENV{WITH_TBB}" STREQUAL "")
+ SET(rocksdb_static_libs ${rocksdb_static_libs}
+ $ENV{WITH_TBB}/libtbb${PIC_EXT}.a)
+ ADD_DEFINITIONS(-DTBB)
+ENDIF()
+
#
# MariaDB: Dynamic plugin build is not suitable with unittest ATM
#
@@ -154,6 +189,15 @@ ENDIF()
# ADD_SUBDIRECTORY(unittest)
#ENDIF()
+CHECK_INCLUDE_FILES(zlib.h HAVE_ZLIB_H)
+
+IF (HAVE_ZLIB_H)
+ ADD_DEFINITIONS(-DZLIB)
+ SET(rocksdb_static_libs ${rocksdb_static_libs} ${ZLIB_LIBRARY})
+ENDIF()
+
+SET(rocksdb_static_libs ${rocksdb_static_libs} "-lrt")
+
ADD_LIBRARY(rocksdb_tools STATIC
rocksdb/tools/ldb_tool.cc
rocksdb/tools/ldb_cmd.cc
diff --git a/storage/rocksdb/README b/storage/rocksdb/README
index 472b7986f91..3af455924a4 100644
--- a/storage/rocksdb/README
+++ b/storage/rocksdb/README
@@ -1,12 +1,20 @@
== Summary ==
-This directory contains RocksDB-based Storage Engine (RDBSE) for MySQL = "MyRocks".
+This directory contains RocksDB-based Storage Engine (RDBSE) for MySQL,
+also known as "MyRocks".
== Resources ==
-See https://github.com/facebook/mysql-5.6/wiki/Getting-Started-with-MyRocks
-Facebook group: https://www.facebook.com/groups/mysqlonrocksdb/
+https://github.com/facebook/mysql-5.6/wiki/Getting-Started-with-MyRocks
+https://www.facebook.com/groups/MyRocks/
== Coding Conventions ==
-The baseline for MyRocks coding conventions is the MySQL set, available at
+The baseline for MyRocks coding conventions for the code in storage/rocksdb/
+is based on the default clang format with a few minor changes. The file
+storage/rocksdb/.clang-format describes conventions and can be integrated
+with Vim or Emacs as described here:
+http://releases.llvm.org/3.6.0/tools/clang/docs/ClangFormat.html#vim-integration
+
+All code outside of storage/rocksdb/ should conform to the MySQL coding
+conventions:
http://dev.mysql.com/doc/internals/en/coding-guidelines.html.
Several refinements:
@@ -34,5 +42,9 @@ Several refinements:
"_vect" for a std::vector etc.
== Running Tests ==
-To run tests from rocksdb, rocksd_rpl or rocksdb_sys_vars packages, use the following parameters:
---mysqld=--default-storage-engine=rocksdb --mysqld=--skip-innodb --mysqld=--default-tmp-storage-engine=MyISAM --mysqld=--rocksdb \ No newline at end of file
+To run tests from rocksdb, rocksdb_rpl or other rocksdb_* packages, use the
+following parameters:
+ --default-storage-engine=rocksdb
+ --skip-innodb
+ --default-tmp-storage-engine=MyISAM
+ --rocksdb
diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake
index 8b7e64a8e08..462be5ae242 100644
--- a/storage/rocksdb/build_rocksdb.cmake
+++ b/storage/rocksdb/build_rocksdb.cmake
@@ -143,7 +143,8 @@ set(LIBS ${ROCKSDB_LIBS} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
# Main library source code
set(ROCKSDB_SOURCES
- db/auto_roll_logger.cc
+ cache/lru_cache.cc
+ cache/sharded_cache.cc
db/builder.cc
db/c.cc
db/column_family.cc
@@ -152,19 +153,23 @@ set(ROCKSDB_SOURCES
db/compaction_iterator.cc
db/compaction_job.cc
db/compaction_picker.cc
+ db/compaction_picker_universal.cc
db/convenience.cc
- db/dbformat.cc
db/db_filesnapshot.cc
db/db_impl.cc
+ db/db_impl_compaction_flush.cc
db/db_impl_debug.cc
db/db_impl_experimental.cc
+ db/db_impl_files.cc
+ db/db_impl_open.cc
db/db_impl_readonly.cc
+ db/db_impl_write.cc
db/db_info_dumper.cc
db/db_iter.cc
+ db/dbformat.cc
db/event_helpers.cc
- db/external_sst_file_ingestion_job.cc
db/experimental.cc
- db/filename.cc
+ db/external_sst_file_ingestion_job.cc
db/file_indexer.cc
db/flush_job.cc
db/flush_scheduler.cc
@@ -172,9 +177,9 @@ set(ROCKSDB_SOURCES
db/internal_stats.cc
db/log_reader.cc
db/log_writer.cc
+ db/malloc_stats.cc
db/managed_iterator.cc
db/memtable.cc
- db/memtable_allocator.cc
db/memtable_list.cc
db/merge_helper.cc
db/merge_operator.cc
@@ -192,11 +197,39 @@ set(ROCKSDB_SOURCES
db/write_batch_base.cc
db/write_controller.cc
db/write_thread.cc
+ env/env.cc
+ env/env_chroot.cc
+ env/env_hdfs.cc
+ env/mock_env.cc
+ memtable/alloc_tracker.cc
+ memtable/hash_cuckoo_rep.cc
memtable/hash_cuckoo_rep.cc
memtable/hash_linklist_rep.cc
+ memtable/hash_linklist_rep.cc
+ memtable/hash_skiplist_rep.cc
memtable/hash_skiplist_rep.cc
memtable/skiplistrep.cc
+ memtable/skiplistrep.cc
+ memtable/vectorrep.cc
memtable/vectorrep.cc
+ memtable/write_buffer_manager.cc
+ monitoring/histogram.cc
+ monitoring/histogram_windowing.cc
+ monitoring/instrumented_mutex.cc
+ monitoring/iostats_context.cc
+ monitoring/perf_context.cc
+ monitoring/perf_level.cc
+ monitoring/statistics.cc
+ monitoring/thread_status_impl.cc
+ monitoring/thread_status_updater.cc
+ monitoring/thread_status_util.cc
+ monitoring/thread_status_util_debug.cc
+ options/cf_options.cc
+ options/db_options.cc
+ options/options.cc
+ options/options_helper.cc
+ options/options_parser.cc
+ options/options_sanity_check.cc
port/stack_trace.cc
table/adaptive_table_factory.cc
table/block.cc
@@ -214,65 +247,47 @@ set(ROCKSDB_SOURCES
table/format.cc
table/full_filter_block.cc
table/get_context.cc
+ table/index_builder.cc
table/iterator.cc
table/merging_iterator.cc
- table/sst_file_writer.cc
table/meta_blocks.cc
+ table/partitioned_filter_block.cc
+ table/persistent_cache_helper.cc
table/plain_table_builder.cc
table/plain_table_factory.cc
table/plain_table_index.cc
table/plain_table_key_coding.cc
table/plain_table_reader.cc
- table/persistent_cache_helper.cc
+ table/sst_file_writer.cc
table/table_properties.cc
table/two_level_iterator.cc
- tools/sst_dump_tool.cc
tools/db_bench_tool.cc
tools/dump/db_dump_tool.cc
+ tools/ldb_cmd.cc
+ tools/ldb_tool.cc
+ tools/sst_dump_tool.cc
util/arena.cc
+ util/auto_roll_logger.cc
util/bloom.cc
- util/cf_options.cc
- util/clock_cache.cc
util/coding.cc
util/compaction_job_stats_impl.cc
util/comparator.cc
util/concurrent_arena.cc
util/crc32c.cc
- util/db_options.cc
util/delete_scheduler.cc
util/dynamic_bloom.cc
- util/env.cc
- util/env_chroot.cc
- util/env_hdfs.cc
util/event_logger.cc
- util/file_util.cc
util/file_reader_writer.cc
- util/sst_file_manager_impl.cc
+ util/file_util.cc
+ util/filename.cc
util/filter_policy.cc
util/hash.cc
- util/histogram.cc
- util/histogram_windowing.cc
- util/instrumented_mutex.cc
- util/iostats_context.cc
-
- util/lru_cache.cc
- tools/ldb_cmd.cc
- tools/ldb_tool.cc
- util/logging.cc
util/log_buffer.cc
- util/memenv.cc
util/murmurhash.cc
- util/options.cc
- util/options_helper.cc
- util/options_parser.cc
- util/options_sanity_check.cc
- util/perf_context.cc
- util/perf_level.cc
util/random.cc
util/rate_limiter.cc
- util/sharded_cache.cc
util/slice.cc
- util/statistics.cc
+ util/sst_file_manager_impl.cc
util/status.cc
util/status_message.cc
util/string_util.cc
@@ -280,17 +295,17 @@ set(ROCKSDB_SOURCES
util/testutil.cc
util/thread_local.cc
util/threadpool_imp.cc
- util/thread_status_impl.cc
- util/thread_status_updater.cc
- util/thread_status_util.cc
- util/thread_status_util_debug.cc
util/transaction_test_util.cc
util/xxhash.cc
utilities/backupable/backupable_db.cc
utilities/blob_db/blob_db.cc
- utilities/checkpoint/checkpoint.cc
+ utilities/checkpoint/checkpoint_impl.cc
+ utilities/col_buf_decoder.cc
+ utilities/col_buf_encoder.cc
+ utilities/column_aware_encoding_util.cc
utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
utilities/date_tiered/date_tiered_db_impl.cc
+ utilities/debug.cc
utilities/document/document_db.cc
utilities/document/json_document.cc
utilities/document/json_document_builder.cc
@@ -299,10 +314,10 @@ set(ROCKSDB_SOURCES
utilities/leveldb_options/leveldb_options.cc
utilities/lua/rocks_lua_compaction_filter.cc
utilities/memory/memory_util.cc
+ utilities/merge_operators/max.cc
+ utilities/merge_operators/put.cc
utilities/merge_operators/string_append/stringappend.cc
utilities/merge_operators/string_append/stringappend2.cc
- utilities/merge_operators/put.cc
- utilities/merge_operators/max.cc
utilities/merge_operators/uint64add.cc
utilities/option_change_migration/option_change_migration.cc
utilities/options/options_util.cc
@@ -315,20 +330,15 @@ set(ROCKSDB_SOURCES
utilities/simulator_cache/sim_cache.cc
utilities/spatialdb/spatial_db.cc
utilities/table_properties_collectors/compact_on_deletion_collector.cc
- utilities/transactions/optimistic_transaction_impl.cc
utilities/transactions/optimistic_transaction_db_impl.cc
+ utilities/transactions/pessimistic_transaction_db.cc
utilities/transactions/transaction_base.cc
- utilities/transactions/transaction_impl.cc
- utilities/transactions/transaction_db_impl.cc
utilities/transactions/transaction_db_mutex_impl.cc
utilities/transactions/transaction_lock_mgr.cc
utilities/transactions/transaction_util.cc
utilities/ttl/db_ttl_impl.cc
utilities/write_batch_with_index/write_batch_with_index.cc
utilities/write_batch_with_index/write_batch_with_index_internal.cc
- utilities/col_buf_encoder.cc
- utilities/col_buf_decoder.cc
- utilities/column_aware_encoding_util.cc
)
if(WIN32)
@@ -343,8 +353,8 @@ if(WIN32)
else()
list(APPEND ROCKSDB_SOURCES
port/port_posix.cc
- util/env_posix.cc
- util/io_posix.cc)
+ env/env_posix.cc
+ env/io_posix.cc)
endif()
SET(SOURCES)
FOREACH(s ${ROCKSDB_SOURCES})
@@ -364,3 +374,4 @@ target_link_libraries(rocksdblib ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set_target_properties(rocksdblib PROPERTIES COMPILE_FLAGS "-fPIC -fno-builtin-memcmp -frtti")
endif()
+
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 981acdf22b0..fd6105ae3c2 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -32,6 +32,8 @@
/* C++ standard header files */
#include <algorithm>
+#include <limits>
+#include <map>
#include <queue>
#include <set>
#include <string>
@@ -41,6 +43,7 @@
#include "./debug_sync.h"
#include "./my_bit.h"
#include "./my_stacktrace.h"
+#include "./my_sys.h"
#include "./sql_audit.h"
#include "./sql_table.h"
#include "./sql_hset.h"
@@ -50,14 +53,23 @@
#endif
#include <mysys_err.h>
+// Both MySQL and RocksDB define the same constant. To avoid compilation errors
+// till we make the fix in RocksDB, we'll temporary undefine it here.
+#undef CACHE_LINE_SIZE
+
/* RocksDB includes */
+#include "monitoring/histogram.h"
#include "rocksdb/compaction_filter.h"
+#include "rocksdb/env.h"
#include "rocksdb/persistent_cache.h"
#include "rocksdb/rate_limiter.h"
#include "rocksdb/slice_transform.h"
+#include "rocksdb/thread_status.h"
#include "rocksdb/utilities/checkpoint.h"
#include "rocksdb/utilities/convenience.h"
#include "rocksdb/utilities/memory_util.h"
+#include "rocksdb/utilities/sim_cache.h"
+#include "util/stop_watch.h"
/* MyRocks includes */
#include "./event_listener.h"
@@ -101,6 +113,12 @@ namespace myrocks {
static st_global_stats global_stats;
static st_export_stats export_stats;
+static st_memory_stats memory_stats;
+static st_io_stall_stats io_stall_stats;
+
+const std::string DEFAULT_CF_NAME("default");
+const std::string DEFAULT_SYSTEM_CF_NAME("__system__");
+const std::string PER_INDEX_CF_NAME("$per_index_cf");
/**
Updates row counters based on the table type and operation type.
@@ -121,14 +139,22 @@ static handler *rocksdb_create_handler(my_core::handlerton *hton,
bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
const rocksdb::Slice &eq_cond,
- const bool use_all_keys, bool is_ascending);
+ const bool use_all_keys);
+
+static rocksdb::CompactRangeOptions getCompactRangeOptions() {
+ rocksdb::CompactRangeOptions compact_range_options;
+ compact_range_options.bottommost_level_compaction =
+ rocksdb::BottommostLevelCompaction::kForce;
+ compact_range_options.exclusive_manual_compaction = false;
+ return compact_range_options;
+}
///////////////////////////////////////////////////////////
// Parameters and settings
///////////////////////////////////////////////////////////
-static char *rocksdb_default_cf_options;
-static char *rocksdb_override_cf_options;
-Rdb_cf_options rocksdb_cf_options_map;
+static char *rocksdb_default_cf_options = nullptr;
+static char *rocksdb_override_cf_options = nullptr;
+static char *rocksdb_update_cf_options = nullptr;
///////////////////////////////////////////////////////////
// Globals
@@ -136,6 +162,7 @@ Rdb_cf_options rocksdb_cf_options_map;
handlerton *rocksdb_hton;
rocksdb::TransactionDB *rdb = nullptr;
+rocksdb::HistogramImpl *commit_latency_stats = nullptr;
static std::shared_ptr<rocksdb::Statistics> rocksdb_stats;
static std::unique_ptr<rocksdb::Env> flashcache_aware_env;
@@ -144,8 +171,8 @@ static std::shared_ptr<Rdb_tbl_prop_coll_factory> properties_collector_factory;
Rdb_dict_manager dict_manager;
Rdb_cf_manager cf_manager;
Rdb_ddl_manager ddl_manager;
-const char *m_mysql_gtid;
Rdb_binlog_manager binlog_manager;
+Rdb_io_watchdog *io_watchdog = nullptr;
/**
MyRocks background thread control
@@ -159,11 +186,7 @@ static Rdb_background_thread rdb_bg_thread;
// collation check requirement.
Regex_list_handler *rdb_collation_exceptions;
-static const char *const ERRSTR_ROLLBACK_ONLY =
- "This transaction was rolled back and cannot be "
- "committed. Only supported operation is to roll it back, "
- "so all pending changes will be discarded. "
- "Please restart another transaction.";
+static const char **rdb_get_error_messages(int nr);
static void rocksdb_flush_all_memtables() {
const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
@@ -186,12 +209,11 @@ static int rocksdb_compact_column_family(THD *const thd,
DBUG_ASSERT(value != nullptr);
if (const char *const cf = value->val_str(value, buff, &len)) {
- bool is_automatic;
- auto cfh = cf_manager.get_cf(cf, "", nullptr, &is_automatic);
+ auto cfh = cf_manager.get_cf(cf);
if (cfh != nullptr && rdb != nullptr) {
sql_print_information("RocksDB: Manual compaction of column family: %s\n",
cf);
- rdb->CompactRange(rocksdb::CompactRangeOptions(), cfh, nullptr, nullptr);
+ rdb->CompactRange(getCompactRangeOptions(), cfh, nullptr, nullptr);
}
}
return HA_EXIT_SUCCESS;
@@ -252,30 +274,27 @@ static int rocksdb_create_checkpoint(
checkpoint_dir.c_str());
rocksdb::Checkpoint *checkpoint;
auto status = rocksdb::Checkpoint::Create(rdb, &checkpoint);
+ // We can only return HA_EXIT_FAILURE/HA_EXIT_SUCCESS here which is why
+ // the return code is ignored, but by calling into rdb_error_to_mysql,
+ // it will call my_error for us, which will propogate up to the client.
+ int rc __attribute__((__unused__));
if (status.ok()) {
status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str());
+ delete checkpoint;
if (status.ok()) {
sql_print_information(
"RocksDB: created checkpoint in directory : %s\n",
checkpoint_dir.c_str());
+ return HA_EXIT_SUCCESS;
} else {
- my_printf_error(
- ER_UNKNOWN_ERROR,
- "RocksDB: Failed to create checkpoint directory. status %d %s",
- MYF(0), status.code(), status.ToString().c_str());
+ rc = ha_rocksdb::rdb_error_to_mysql(status);
}
- delete checkpoint;
} else {
- const std::string err_text(status.ToString());
- my_printf_error(
- ER_UNKNOWN_ERROR,
- "RocksDB: failed to initialize checkpoint. status %d %s\n", MYF(0),
- status.code(), err_text.c_str());
+ rc = ha_rocksdb::rdb_error_to_mysql(status);
}
- return status.code();
}
}
- return HA_ERR_INTERNAL_ERROR;
+ return HA_EXIT_FAILURE;
}
/* This method is needed to indicate that the
@@ -292,8 +311,48 @@ static void rocksdb_force_flush_memtable_now_stub(
static int rocksdb_force_flush_memtable_now(
THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
struct st_mysql_value *const value) {
- sql_print_information("RocksDB: Manual memtable flush\n");
+ sql_print_information("RocksDB: Manual memtable flush.");
+ rocksdb_flush_all_memtables();
+ return HA_EXIT_SUCCESS;
+}
+
+static void rocksdb_force_flush_memtable_and_lzero_now_stub(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ const void *const save) {}
+
+static int rocksdb_force_flush_memtable_and_lzero_now(
+ THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr,
+ struct st_mysql_value *const value) {
+ sql_print_information("RocksDB: Manual memtable and L0 flush.");
rocksdb_flush_all_memtables();
+
+ const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+ rocksdb::CompactionOptions c_options = rocksdb::CompactionOptions();
+ rocksdb::ColumnFamilyMetaData metadata;
+ rocksdb::ColumnFamilyDescriptor cf_descr;
+
+ for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ rdb->GetColumnFamilyMetaData(cf_handle, &metadata);
+ cf_handle->GetDescriptor(&cf_descr);
+ c_options.output_file_size_limit = cf_descr.options.target_file_size_base;
+
+ DBUG_ASSERT(metadata.levels[0].level == 0);
+ std::vector<std::string> file_names;
+ for (auto &file : metadata.levels[0].files) {
+ file_names.emplace_back(file.db_path + file.name);
+ }
+
+ if (!file_names.empty()) {
+ rocksdb::Status s;
+ s = rdb->CompactFiles(c_options, cf_handle, file_names, 1);
+
+ if (!s.ok() && !s.IsAborted()) {
+ rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
+ return HA_EXIT_FAILURE;
+ }
+ }
+ }
+
return HA_EXIT_SUCCESS;
}
@@ -336,41 +395,66 @@ static void rocksdb_set_rate_limiter_bytes_per_sec(THD *thd,
void *var_ptr,
const void *save);
+static void rocksdb_set_sst_mgr_rate_bytes_per_sec(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr,
+ const void *save);
+
static void rocksdb_set_delayed_write_rate(THD *thd,
struct st_mysql_sys_var *var,
void *var_ptr, const void *save);
+static void rocksdb_set_max_latest_deadlocks(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
static void rdb_set_collation_exception_list(const char *exception_list);
static void rocksdb_set_collation_exception_list(THD *thd,
struct st_mysql_sys_var *var,
void *var_ptr,
const void *save);
+void rocksdb_set_update_cf_options(THD *thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr,
+ const void *save);
+
static void
rocksdb_set_bulk_load(THD *thd,
struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
void *var_ptr, const void *save);
-static void rocksdb_set_max_background_compactions(
- THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr,
- const void *const save);
+static void rocksdb_set_bulk_load_allow_unsorted(
+ THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)),
+ void *var_ptr, const void *save);
+
+static void rocksdb_set_max_background_jobs(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save);
//////////////////////////////////////////////////////////////////////////////
// Options definitions
//////////////////////////////////////////////////////////////////////////////
static long long rocksdb_block_cache_size;
+static long long rocksdb_sim_cache_size;
+static my_bool rocksdb_use_clock_cache;
/* Use unsigned long long instead of uint64_t because of MySQL compatibility */
-static unsigned long long // NOLINT(runtime/int)
+static unsigned long long // NOLINT(runtime/int)
rocksdb_rate_limiter_bytes_per_sec;
+static unsigned long long // NOLINT(runtime/int)
+ rocksdb_sst_mgr_rate_bytes_per_sec;
static unsigned long long rocksdb_delayed_write_rate;
-static unsigned long // NOLINT(runtime/int)
+static uint32_t rocksdb_max_latest_deadlocks;
+static unsigned long // NOLINT(runtime/int)
rocksdb_persistent_cache_size_mb;
static ulong rocksdb_info_log_level;
static char *rocksdb_wal_dir;
static char *rocksdb_persistent_cache_path;
static ulong rocksdb_index_type;
-static char rocksdb_background_sync;
+static uint32_t rocksdb_flush_log_at_trx_commit;
static uint32_t rocksdb_debug_optimizer_n_rows;
static my_bool rocksdb_force_compute_memtable_stats;
+static uint32_t rocksdb_force_compute_memtable_stats_cachetime;
static my_bool rocksdb_debug_optimizer_no_zero_cardinality;
static uint32_t rocksdb_wal_recovery_mode;
static uint32_t rocksdb_access_hint_on_compaction_start;
@@ -382,6 +466,15 @@ static my_bool rocksdb_enable_2pc = 0;
static char *rocksdb_strict_collation_exceptions;
static my_bool rocksdb_collect_sst_properties = 1;
static my_bool rocksdb_force_flush_memtable_now_var = 0;
+static my_bool rocksdb_force_flush_memtable_and_lzero_now_var = 0;
+static my_bool rocksdb_enable_ttl = 1;
+static my_bool rocksdb_enable_ttl_read_filtering = 1;
+static int rocksdb_debug_ttl_rec_ts = 0;
+static int rocksdb_debug_ttl_snapshot_ts = 0;
+static int rocksdb_debug_ttl_read_filter_ts = 0;
+static my_bool rocksdb_debug_ttl_ignore_pk = 0;
+static my_bool rocksdb_reset_stats = 0;
+static uint32_t rocksdb_io_write_timeout_secs = 0;
static uint64_t rocksdb_number_stat_computes = 0;
static uint32_t rocksdb_seconds_between_stat_computes = 3600;
static long long rocksdb_compaction_sequential_deletes = 0l;
@@ -392,6 +485,7 @@ static char *rocksdb_datadir;
static uint32_t rocksdb_table_stats_sampling_pct;
static my_bool rocksdb_enable_bulk_load_api = 1;
static my_bool rocksdb_print_snapshot_conflict_queries = 0;
+static my_bool rocksdb_large_prefix = 0;
char *compression_types_val=
const_cast<char*>(get_rocksdb_supported_compression_types());
@@ -399,19 +493,25 @@ char *compression_types_val=
std::atomic<uint64_t> rocksdb_snapshot_conflict_errors(0);
std::atomic<uint64_t> rocksdb_wal_group_syncs(0);
-static rocksdb::DBOptions rdb_init_rocksdb_db_options(void) {
- rocksdb::DBOptions o;
+static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
+ auto o = std::unique_ptr<rocksdb::DBOptions>(new rocksdb::DBOptions());
- o.create_if_missing = true;
- o.listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager));
- o.info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
- o.max_subcompactions = DEFAULT_SUBCOMPACTIONS;
+ o->create_if_missing = true;
+ o->listeners.push_back(std::make_shared<Rdb_event_listener>(&ddl_manager));
+ o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
+ o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;
+ o->concurrent_prepare = true;
+ o->manual_wal_flush = true;
return o;
}
-static rocksdb::DBOptions rocksdb_db_options = rdb_init_rocksdb_db_options();
-static rocksdb::BlockBasedTableOptions rocksdb_tbl_options;
+/* DBOptions contains Statistics and needs to be destructed last */
+static std::unique_ptr<rocksdb::BlockBasedTableOptions> rocksdb_tbl_options =
+ std::unique_ptr<rocksdb::BlockBasedTableOptions>(
+ new rocksdb::BlockBasedTableOptions());
+static std::unique_ptr<rocksdb::DBOptions> rocksdb_db_options =
+ rdb_init_rocksdb_db_options();
static std::shared_ptr<rocksdb::RateLimiter> rocksdb_rate_limiter;
@@ -431,11 +531,55 @@ static void rocksdb_set_rocksdb_info_log_level(
RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
rocksdb_info_log_level = *static_cast<const uint64_t *>(save);
- rocksdb_db_options.info_log->SetInfoLogLevel(
+ rocksdb_db_options->info_log->SetInfoLogLevel(
static_cast<const rocksdb::InfoLogLevel>(rocksdb_info_log_level));
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
+static void rocksdb_set_reset_stats(
+ my_core::THD *const /* unused */,
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr, const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rdb != nullptr);
+ DBUG_ASSERT(rocksdb_stats != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+
+ if (rocksdb_reset_stats) {
+ rocksdb::Status s = rdb->ResetStats();
+
+ // RocksDB will always return success. Let's document this assumption here
+ // as well so that we'll get immediately notified when contract changes.
+ DBUG_ASSERT(s == rocksdb::Status::OK());
+
+ s = rocksdb_stats->Reset();
+ DBUG_ASSERT(s == rocksdb::Status::OK());
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+static void rocksdb_set_io_write_timeout(
+ my_core::THD *const thd MY_ATTRIBUTE((__unused__)),
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rdb != nullptr);
+ DBUG_ASSERT(io_watchdog != nullptr);
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const uint32_t new_val = *static_cast<const uint32_t *>(save);
+
+ rocksdb_io_write_timeout_secs = new_val;
+ io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs);
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS};
static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1,
@@ -450,9 +594,12 @@ const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024;
const size_t RDB_MIN_MERGE_BUF_SIZE = 100;
const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE = 1024 * 1024 * 1024;
const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE = 100;
+const size_t RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
+const size_t RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY = 0;
const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE = 512 * 1024 * 1024;
const int64 RDB_MIN_BLOCK_CACHE_SIZE = 1024;
const int RDB_MAX_CHECKSUMS_PCT = 100;
+const ulong RDB_DEADLOCK_DETECT_DEPTH = 50;
// TODO: 0 means don't wait at all, and we don't support it yet?
static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
@@ -463,6 +610,14 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
static MYSQL_THDVAR_BOOL(deadlock_detect, PLUGIN_VAR_RQCMDARG,
"Enables deadlock detection", nullptr, nullptr, FALSE);
+static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG,
+ "Number of transactions deadlock detection will "
+ "traverse through before assuming deadlock",
+ nullptr, nullptr,
+ /*default*/ RDB_DEADLOCK_DETECT_DEPTH,
+ /*min*/ 2,
+ /*max*/ ULONG_MAX, 0);
+
static MYSQL_THDVAR_BOOL(
trace_sst_api, PLUGIN_VAR_RQCMDARG,
"Generate trace output in the log for each call to the SstFileWriter",
@@ -474,6 +629,11 @@ static MYSQL_THDVAR_BOOL(
"unique_checks and enables rocksdb_commit_in_the_middle.",
nullptr, rocksdb_set_bulk_load, FALSE);
+static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG,
+ "Allow unsorted input during bulk-load. "
+ "Can be changed only when bulk load is disabled.",
+ nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE);
+
static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Enables using SstFileWriter for bulk loading",
@@ -517,6 +677,11 @@ static MYSQL_THDVAR_ULONG(max_row_locks, PLUGIN_VAR_RQCMDARG,
/*min*/ 1,
/*max*/ RDB_MAX_ROW_LOCKS, 0);
+static MYSQL_THDVAR_ULONGLONG(
+ write_batch_max_bytes, PLUGIN_VAR_RQCMDARG,
+ "Maximum size of write batch in bytes. 0 means no limit.", nullptr, nullptr,
+ /* default */ 0, /* min */ 0, /* max */ SIZE_T_MAX, 1);
+
static MYSQL_THDVAR_BOOL(
lock_scanned_rows, PLUGIN_VAR_RQCMDARG,
"Take and hold locks on rows that are scanned but not updated", nullptr,
@@ -548,34 +713,60 @@ static MYSQL_THDVAR_ULONGLONG(
/* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE,
/* max */ SIZE_T_MAX, 1);
+static MYSQL_THDVAR_ULONGLONG(
+ merge_tmp_file_removal_delay_ms, PLUGIN_VAR_RQCMDARG,
+ "Fast index creation creates a large tmp file on disk during index "
+ "creation. Removing this large file all at once when index creation is "
+ "complete can cause trim stalls on Flash. This variable specifies a "
+ "duration to sleep (in milliseconds) between calling chsize() to truncate "
+ "the file in chunks. The chunk size is the same as merge_buf_size.",
+ nullptr, nullptr,
+ /* default (0ms) */ RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* min (0ms) */ RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY,
+ /* max */ SIZE_T_MAX, 1);
+
static MYSQL_SYSVAR_BOOL(
create_if_missing,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.create_if_missing),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->create_if_missing),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::create_if_missing for RocksDB", nullptr, nullptr,
- rocksdb_db_options.create_if_missing);
+ rocksdb_db_options->create_if_missing);
+
+static MYSQL_SYSVAR_BOOL(
+ concurrent_prepare,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->concurrent_prepare),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->concurrent_prepare);
+
+static MYSQL_SYSVAR_BOOL(
+ manual_wal_flush,
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->manual_wal_flush),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr,
+ rocksdb_db_options->manual_wal_flush);
static MYSQL_SYSVAR_BOOL(
create_missing_column_families,
*reinterpret_cast<my_bool *>(
- &rocksdb_db_options.create_missing_column_families),
+ &rocksdb_db_options->create_missing_column_families),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::create_missing_column_families for RocksDB", nullptr, nullptr,
- rocksdb_db_options.create_missing_column_families);
+ rocksdb_db_options->create_missing_column_families);
static MYSQL_SYSVAR_BOOL(
error_if_exists,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.error_if_exists),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->error_if_exists),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::error_if_exists for RocksDB", nullptr, nullptr,
- rocksdb_db_options.error_if_exists);
+ rocksdb_db_options->error_if_exists);
static MYSQL_SYSVAR_BOOL(
paranoid_checks,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.paranoid_checks),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->paranoid_checks),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::paranoid_checks for RocksDB", nullptr, nullptr,
- rocksdb_db_options.paranoid_checks);
+ rocksdb_db_options->paranoid_checks);
static MYSQL_SYSVAR_ULONGLONG(
rate_limiter_bytes_per_sec, rocksdb_rate_limiter_bytes_per_sec,
@@ -583,13 +774,28 @@ static MYSQL_SYSVAR_ULONGLONG(
nullptr, rocksdb_set_rate_limiter_bytes_per_sec, /* default */ 0L,
/* min */ 0L, /* max */ MAX_RATE_LIMITER_BYTES_PER_SEC, 0);
+static MYSQL_SYSVAR_ULONGLONG(
+ sst_mgr_rate_bytes_per_sec, rocksdb_sst_mgr_rate_bytes_per_sec,
+ PLUGIN_VAR_RQCMDARG,
+ "DBOptions::sst_file_manager rate_bytes_per_sec for RocksDB", nullptr,
+ rocksdb_set_sst_mgr_rate_bytes_per_sec,
+ /* default */ DEFAULT_SST_MGR_RATE_BYTES_PER_SEC,
+ /* min */ 0L, /* max */ UINT64_MAX, 0);
+
static MYSQL_SYSVAR_ULONGLONG(delayed_write_rate, rocksdb_delayed_write_rate,
PLUGIN_VAR_RQCMDARG,
"DBOptions::delayed_write_rate", nullptr,
rocksdb_set_delayed_write_rate,
- rocksdb_db_options.delayed_write_rate, 0,
+ rocksdb_db_options->delayed_write_rate, 0,
UINT64_MAX, 0);
+static MYSQL_SYSVAR_UINT(max_latest_deadlocks, rocksdb_max_latest_deadlocks,
+ PLUGIN_VAR_RQCMDARG,
+ "Maximum number of recent "
+ "deadlocks to store",
+ nullptr, rocksdb_set_max_latest_deadlocks,
+ rocksdb::kInitialMaxDeadlocks, 0, UINT32_MAX, 0);
+
static MYSQL_SYSVAR_ENUM(
info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG,
"Filter level for info logs to be written mysqld error log. "
@@ -615,20 +821,20 @@ static MYSQL_SYSVAR_UINT(
/* max */ (uint)rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0);
static MYSQL_SYSVAR_SIZE_T(compaction_readahead_size,
- rocksdb_db_options.compaction_readahead_size,
+ rocksdb_db_options->compaction_readahead_size,
PLUGIN_VAR_RQCMDARG,
"DBOptions::compaction_readahead_size for RocksDB",
nullptr, nullptr,
- rocksdb_db_options.compaction_readahead_size,
+ rocksdb_db_options->compaction_readahead_size,
/* min */ 0L, /* max */ SIZE_T_MAX, 0);
static MYSQL_SYSVAR_BOOL(
new_table_reader_for_compaction_inputs,
*reinterpret_cast<my_bool *>(
- &rocksdb_db_options.new_table_reader_for_compaction_inputs),
+ &rocksdb_db_options->new_table_reader_for_compaction_inputs),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::new_table_reader_for_compaction_inputs for RocksDB", nullptr,
- nullptr, rocksdb_db_options.new_table_reader_for_compaction_inputs);
+ nullptr, rocksdb_db_options->new_table_reader_for_compaction_inputs);
static MYSQL_SYSVAR_UINT(
access_hint_on_compaction_start, rocksdb_access_hint_on_compaction_start,
@@ -641,42 +847,42 @@ static MYSQL_SYSVAR_UINT(
static MYSQL_SYSVAR_BOOL(
allow_concurrent_memtable_write,
*reinterpret_cast<my_bool *>(
- &rocksdb_db_options.allow_concurrent_memtable_write),
- PLUGIN_VAR_RQCMDARG,
+ &rocksdb_db_options->allow_concurrent_memtable_write),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::allow_concurrent_memtable_write for RocksDB", nullptr, nullptr,
false);
static MYSQL_SYSVAR_BOOL(
enable_write_thread_adaptive_yield,
*reinterpret_cast<my_bool *>(
- &rocksdb_db_options.enable_write_thread_adaptive_yield),
- PLUGIN_VAR_RQCMDARG,
+ &rocksdb_db_options->enable_write_thread_adaptive_yield),
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::enable_write_thread_adaptive_yield for RocksDB", nullptr,
nullptr, false);
-static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options.max_open_files,
+static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options->max_open_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::max_open_files for RocksDB", nullptr,
- nullptr, rocksdb_db_options.max_open_files,
+ nullptr, rocksdb_db_options->max_open_files,
/* min */ -1, /* max */ INT_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(max_total_wal_size,
- rocksdb_db_options.max_total_wal_size,
+ rocksdb_db_options->max_total_wal_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::max_total_wal_size for RocksDB", nullptr,
- nullptr, rocksdb_db_options.max_total_wal_size,
+ nullptr, rocksdb_db_options->max_total_wal_size,
/* min */ 0, /* max */ LONGLONG_MAX, 0);
static MYSQL_SYSVAR_BOOL(
- use_fsync, *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_fsync),
+ use_fsync, *reinterpret_cast<my_bool *>(&rocksdb_db_options->use_fsync),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::use_fsync for RocksDB", nullptr, nullptr,
- rocksdb_db_options.use_fsync);
+ rocksdb_db_options->use_fsync);
static MYSQL_SYSVAR_STR(wal_dir, rocksdb_wal_dir,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::wal_dir for RocksDB", nullptr, nullptr,
- rocksdb_db_options.wal_dir.c_str());
+ rocksdb_db_options->wal_dir.c_str());
static MYSQL_SYSVAR_STR(
persistent_cache_path, rocksdb_persistent_cache_path,
@@ -693,186 +899,170 @@ static MYSQL_SYSVAR_ULONG(
static MYSQL_SYSVAR_UINT64_T(
delete_obsolete_files_period_micros,
- rocksdb_db_options.delete_obsolete_files_period_micros,
+ rocksdb_db_options->delete_obsolete_files_period_micros,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::delete_obsolete_files_period_micros for RocksDB", nullptr,
- nullptr, rocksdb_db_options.delete_obsolete_files_period_micros,
+ nullptr, rocksdb_db_options->delete_obsolete_files_period_micros,
/* min */ 0, /* max */ LONGLONG_MAX, 0);
-static MYSQL_SYSVAR_INT(base_background_compactions,
- rocksdb_db_options.base_background_compactions,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DBOptions::base_background_compactions for RocksDB",
- nullptr, nullptr,
- rocksdb_db_options.base_background_compactions,
- /* min */ -1, /* max */ MAX_BACKGROUND_COMPACTIONS, 0);
-
-static MYSQL_SYSVAR_INT(max_background_compactions,
- rocksdb_db_options.max_background_compactions,
+static MYSQL_SYSVAR_INT(max_background_jobs,
+ rocksdb_db_options->max_background_jobs,
PLUGIN_VAR_RQCMDARG,
- "DBOptions::max_background_compactions for RocksDB",
- nullptr, rocksdb_set_max_background_compactions,
- rocksdb_db_options.max_background_compactions,
- /* min */ 1, /* max */ MAX_BACKGROUND_COMPACTIONS, 0);
-
-static MYSQL_SYSVAR_INT(max_background_flushes,
- rocksdb_db_options.max_background_flushes,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DBOptions::max_background_flushes for RocksDB",
- nullptr, nullptr,
- rocksdb_db_options.max_background_flushes,
- /* min */ 1, /* max */ MAX_BACKGROUND_FLUSHES, 0);
+ "DBOptions::max_background_jobs for RocksDB", nullptr,
+ rocksdb_set_max_background_jobs,
+ rocksdb_db_options->max_background_jobs,
+ /* min */ -1, /* max */ MAX_BACKGROUND_JOBS, 0);
static MYSQL_SYSVAR_UINT(max_subcompactions,
- rocksdb_db_options.max_subcompactions,
+ rocksdb_db_options->max_subcompactions,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::max_subcompactions for RocksDB", nullptr,
- nullptr, rocksdb_db_options.max_subcompactions,
+ nullptr, rocksdb_db_options->max_subcompactions,
/* min */ 1, /* max */ MAX_SUBCOMPACTIONS, 0);
static MYSQL_SYSVAR_SIZE_T(max_log_file_size,
- rocksdb_db_options.max_log_file_size,
+ rocksdb_db_options->max_log_file_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::max_log_file_size for RocksDB", nullptr,
- nullptr, rocksdb_db_options.max_log_file_size,
+ nullptr, rocksdb_db_options->max_log_file_size,
/* min */ 0L, /* max */ SIZE_T_MAX, 0);
static MYSQL_SYSVAR_SIZE_T(log_file_time_to_roll,
- rocksdb_db_options.log_file_time_to_roll,
+ rocksdb_db_options->log_file_time_to_roll,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::log_file_time_to_roll for RocksDB",
nullptr, nullptr,
- rocksdb_db_options.log_file_time_to_roll,
+ rocksdb_db_options->log_file_time_to_roll,
/* min */ 0L, /* max */ SIZE_T_MAX, 0);
static MYSQL_SYSVAR_SIZE_T(keep_log_file_num,
- rocksdb_db_options.keep_log_file_num,
+ rocksdb_db_options->keep_log_file_num,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::keep_log_file_num for RocksDB", nullptr,
- nullptr, rocksdb_db_options.keep_log_file_num,
+ nullptr, rocksdb_db_options->keep_log_file_num,
/* min */ 0L, /* max */ SIZE_T_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(max_manifest_file_size,
- rocksdb_db_options.max_manifest_file_size,
+ rocksdb_db_options->max_manifest_file_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::max_manifest_file_size for RocksDB",
nullptr, nullptr,
- rocksdb_db_options.max_manifest_file_size,
+ rocksdb_db_options->max_manifest_file_size,
/* min */ 0L, /* max */ ULONGLONG_MAX, 0);
static MYSQL_SYSVAR_INT(table_cache_numshardbits,
- rocksdb_db_options.table_cache_numshardbits,
+ rocksdb_db_options->table_cache_numshardbits,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::table_cache_numshardbits for RocksDB",
nullptr, nullptr,
- rocksdb_db_options.table_cache_numshardbits,
+ rocksdb_db_options->table_cache_numshardbits,
/* min */ 0, /* max */ INT_MAX, 0);
-static MYSQL_SYSVAR_UINT64_T(wal_ttl_seconds, rocksdb_db_options.WAL_ttl_seconds,
+static MYSQL_SYSVAR_UINT64_T(wal_ttl_seconds, rocksdb_db_options->WAL_ttl_seconds,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::WAL_ttl_seconds for RocksDB", nullptr,
- nullptr, rocksdb_db_options.WAL_ttl_seconds,
+ nullptr, rocksdb_db_options->WAL_ttl_seconds,
/* min */ 0L, /* max */ LONGLONG_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(wal_size_limit_mb,
- rocksdb_db_options.WAL_size_limit_MB,
+ rocksdb_db_options->WAL_size_limit_MB,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::WAL_size_limit_MB for RocksDB", nullptr,
- nullptr, rocksdb_db_options.WAL_size_limit_MB,
+ nullptr, rocksdb_db_options->WAL_size_limit_MB,
/* min */ 0L, /* max */ LONGLONG_MAX, 0);
static MYSQL_SYSVAR_SIZE_T(manifest_preallocation_size,
- rocksdb_db_options.manifest_preallocation_size,
+ rocksdb_db_options->manifest_preallocation_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::manifest_preallocation_size for RocksDB",
nullptr, nullptr,
- rocksdb_db_options.manifest_preallocation_size,
+ rocksdb_db_options->manifest_preallocation_size,
/* min */ 0L, /* max */ SIZE_T_MAX, 0);
static MYSQL_SYSVAR_BOOL(
use_direct_reads,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_direct_reads),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->use_direct_reads),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::use_direct_reads for RocksDB", nullptr, nullptr,
- rocksdb_db_options.use_direct_reads);
+ rocksdb_db_options->use_direct_reads);
static MYSQL_SYSVAR_BOOL(
- use_direct_writes,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_direct_writes),
+ use_direct_io_for_flush_and_compaction,
+ *reinterpret_cast<my_bool *>(
+ &rocksdb_db_options->use_direct_io_for_flush_and_compaction),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DBOptions::use_direct_writes for RocksDB", nullptr, nullptr,
- rocksdb_db_options.use_direct_writes);
+ "DBOptions::use_direct_io_for_flush_and_compaction for RocksDB", nullptr,
+ nullptr, rocksdb_db_options->use_direct_io_for_flush_and_compaction);
static MYSQL_SYSVAR_BOOL(
allow_mmap_reads,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.allow_mmap_reads),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->allow_mmap_reads),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::allow_mmap_reads for RocksDB", nullptr, nullptr,
- rocksdb_db_options.allow_mmap_reads);
+ rocksdb_db_options->allow_mmap_reads);
static MYSQL_SYSVAR_BOOL(
allow_mmap_writes,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.allow_mmap_writes),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->allow_mmap_writes),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::allow_mmap_writes for RocksDB", nullptr, nullptr,
- rocksdb_db_options.allow_mmap_writes);
+ rocksdb_db_options->allow_mmap_writes);
static MYSQL_SYSVAR_BOOL(
is_fd_close_on_exec,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.is_fd_close_on_exec),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->is_fd_close_on_exec),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::is_fd_close_on_exec for RocksDB", nullptr, nullptr,
- rocksdb_db_options.is_fd_close_on_exec);
+ rocksdb_db_options->is_fd_close_on_exec);
static MYSQL_SYSVAR_UINT(stats_dump_period_sec,
- rocksdb_db_options.stats_dump_period_sec,
+ rocksdb_db_options->stats_dump_period_sec,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::stats_dump_period_sec for RocksDB",
nullptr, nullptr,
- rocksdb_db_options.stats_dump_period_sec,
+ rocksdb_db_options->stats_dump_period_sec,
/* min */ 0, /* max */ INT_MAX, 0);
static MYSQL_SYSVAR_BOOL(
advise_random_on_open,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.advise_random_on_open),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->advise_random_on_open),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::advise_random_on_open for RocksDB", nullptr, nullptr,
- rocksdb_db_options.advise_random_on_open);
+ rocksdb_db_options->advise_random_on_open);
static MYSQL_SYSVAR_SIZE_T(db_write_buffer_size,
- rocksdb_db_options.db_write_buffer_size,
+ rocksdb_db_options->db_write_buffer_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::db_write_buffer_size for RocksDB",
nullptr, nullptr,
- rocksdb_db_options.db_write_buffer_size,
+ rocksdb_db_options->db_write_buffer_size,
/* min */ 0L, /* max */ SIZE_T_MAX, 0);
static MYSQL_SYSVAR_BOOL(
use_adaptive_mutex,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.use_adaptive_mutex),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->use_adaptive_mutex),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::use_adaptive_mutex for RocksDB", nullptr, nullptr,
- rocksdb_db_options.use_adaptive_mutex);
+ rocksdb_db_options->use_adaptive_mutex);
-static MYSQL_SYSVAR_UINT64_T(bytes_per_sync, rocksdb_db_options.bytes_per_sync,
+static MYSQL_SYSVAR_UINT64_T(bytes_per_sync, rocksdb_db_options->bytes_per_sync,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::bytes_per_sync for RocksDB", nullptr,
- nullptr, rocksdb_db_options.bytes_per_sync,
+ nullptr, rocksdb_db_options->bytes_per_sync,
/* min */ 0L, /* max */ ULONGLONG_MAX, 0);
static MYSQL_SYSVAR_UINT64_T(wal_bytes_per_sync,
- rocksdb_db_options.wal_bytes_per_sync,
+ rocksdb_db_options->wal_bytes_per_sync,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::wal_bytes_per_sync for RocksDB", nullptr,
- nullptr, rocksdb_db_options.wal_bytes_per_sync,
+ nullptr, rocksdb_db_options->wal_bytes_per_sync,
/* min */ 0L, /* max */ ULONGLONG_MAX, 0);
static MYSQL_SYSVAR_BOOL(
enable_thread_tracking,
- *reinterpret_cast<my_bool *>(&rocksdb_db_options.enable_thread_tracking),
+ *reinterpret_cast<my_bool *>(&rocksdb_db_options->enable_thread_tracking),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DBOptions::enable_thread_tracking for RocksDB", nullptr, nullptr,
- rocksdb_db_options.enable_thread_tracking);
+ "DBOptions::enable_thread_tracking for RocksDB", nullptr, nullptr, true);
static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -882,10 +1072,26 @@ static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size,
/* max */ LONGLONG_MAX,
/* Block size */ RDB_MIN_BLOCK_CACHE_SIZE);
+static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Simulated cache size for RocksDB", nullptr,
+ nullptr,
+ /* default */ 0,
+ /* min */ 0,
+ /* max */ LONGLONG_MAX,
+ /* Block size */ 0);
+
+static MYSQL_SYSVAR_BOOL(
+ use_clock_cache,
+ rocksdb_use_clock_cache,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Use ClockCache instead of default LRUCache for RocksDB",
+ nullptr, nullptr, false);
+
static MYSQL_SYSVAR_BOOL(
cache_index_and_filter_blocks,
*reinterpret_cast<my_bool *>(
- &rocksdb_tbl_options.cache_index_and_filter_blocks),
+ &rocksdb_tbl_options->cache_index_and_filter_blocks),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB",
nullptr, nullptr, true);
@@ -901,7 +1107,7 @@ static MYSQL_SYSVAR_BOOL(
static MYSQL_SYSVAR_BOOL(
pin_l0_filter_and_index_blocks_in_cache,
*reinterpret_cast<my_bool *>(
- &rocksdb_tbl_options.pin_l0_filter_and_index_blocks_in_cache),
+ &rocksdb_tbl_options->pin_l0_filter_and_index_blocks_in_cache),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"pin_l0_filter_and_index_blocks_in_cache for RocksDB", nullptr, nullptr,
true);
@@ -910,50 +1116,50 @@ static MYSQL_SYSVAR_ENUM(index_type, rocksdb_index_type,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::index_type for RocksDB",
nullptr, nullptr,
- (ulong)rocksdb_tbl_options.index_type,
+ (ulong)rocksdb_tbl_options->index_type,
&index_type_typelib);
static MYSQL_SYSVAR_BOOL(
hash_index_allow_collision,
*reinterpret_cast<my_bool *>(
- &rocksdb_tbl_options.hash_index_allow_collision),
+ &rocksdb_tbl_options->hash_index_allow_collision),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::hash_index_allow_collision for RocksDB", nullptr,
- nullptr, rocksdb_tbl_options.hash_index_allow_collision);
+ nullptr, rocksdb_tbl_options->hash_index_allow_collision);
static MYSQL_SYSVAR_BOOL(
no_block_cache,
- *reinterpret_cast<my_bool *>(&rocksdb_tbl_options.no_block_cache),
+ *reinterpret_cast<my_bool *>(&rocksdb_tbl_options->no_block_cache),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::no_block_cache for RocksDB", nullptr, nullptr,
- rocksdb_tbl_options.no_block_cache);
+ rocksdb_tbl_options->no_block_cache);
-static MYSQL_SYSVAR_SIZE_T(block_size, rocksdb_tbl_options.block_size,
+static MYSQL_SYSVAR_SIZE_T(block_size, rocksdb_tbl_options->block_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::block_size for RocksDB",
- nullptr, nullptr, rocksdb_tbl_options.block_size,
+ nullptr, nullptr, rocksdb_tbl_options->block_size,
/* min */ 1L, /* max */ SIZE_T_MAX, 0);
static MYSQL_SYSVAR_INT(
- block_size_deviation, rocksdb_tbl_options.block_size_deviation,
+ block_size_deviation, rocksdb_tbl_options->block_size_deviation,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::block_size_deviation for RocksDB", nullptr,
- nullptr, rocksdb_tbl_options.block_size_deviation,
+ nullptr, rocksdb_tbl_options->block_size_deviation,
/* min */ 0, /* max */ INT_MAX, 0);
static MYSQL_SYSVAR_INT(
- block_restart_interval, rocksdb_tbl_options.block_restart_interval,
+ block_restart_interval, rocksdb_tbl_options->block_restart_interval,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::block_restart_interval for RocksDB", nullptr,
- nullptr, rocksdb_tbl_options.block_restart_interval,
+ nullptr, rocksdb_tbl_options->block_restart_interval,
/* min */ 1, /* max */ INT_MAX, 0);
static MYSQL_SYSVAR_BOOL(
whole_key_filtering,
- *reinterpret_cast<my_bool *>(&rocksdb_tbl_options.whole_key_filtering),
+ *reinterpret_cast<my_bool *>(&rocksdb_tbl_options->whole_key_filtering),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"BlockBasedTableOptions::whole_key_filtering for RocksDB", nullptr, nullptr,
- rocksdb_tbl_options.whole_key_filtering);
+ rocksdb_tbl_options->whole_key_filtering);
static MYSQL_SYSVAR_STR(default_cf_options, rocksdb_default_cf_options,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -964,16 +1170,27 @@ static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options,
"option overrides per cf for RocksDB", nullptr, nullptr,
"");
-static MYSQL_SYSVAR_BOOL(background_sync, rocksdb_background_sync,
- PLUGIN_VAR_RQCMDARG,
- "turns on background syncs for RocksDB", nullptr,
- nullptr, FALSE);
+static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC
+ /* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/,
+ "Option updates per column family for RocksDB", nullptr,
+ rocksdb_set_update_cf_options, nullptr);
+
+enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
+ FLUSH_LOG_NEVER = 0,
+ FLUSH_LOG_SYNC,
+ FLUSH_LOG_BACKGROUND,
+ FLUSH_LOG_MAX /* must be last */
+};
-static MYSQL_THDVAR_UINT(flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
+static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit,
+ rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
"Sync on transaction commit. Similar to "
"innodb_flush_log_at_trx_commit. 1: sync on commit, "
"0,2: not sync on commit",
- nullptr, nullptr, 1, 0, 2, 0);
+ nullptr, nullptr, /* default */ FLUSH_LOG_SYNC,
+ /* min */ FLUSH_LOG_NEVER,
+ /* max */ FLUSH_LOG_BACKGROUND, 0);
static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG,
"WriteOptions::disableWAL for RocksDB", nullptr,
@@ -1017,6 +1234,13 @@ static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats,
"Force to always compute memtable stats",
nullptr, nullptr, TRUE);
+static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime,
+ rocksdb_force_compute_memtable_stats_cachetime,
+ PLUGIN_VAR_RQCMDARG,
+ "Time in usecs to cache memtable estimates", nullptr,
+ nullptr, /* default */ 60 * 1000 * 1000,
+ /* min */ 0, /* max */ INT_MAX, 0);
+
static MYSQL_SYSVAR_BOOL(
debug_optimizer_no_zero_cardinality,
rocksdb_debug_optimizer_no_zero_cardinality, PLUGIN_VAR_RQCMDARG,
@@ -1043,6 +1267,64 @@ static MYSQL_SYSVAR_BOOL(pause_background_work, rocksdb_pause_background_work,
"Disable all rocksdb background operations", nullptr,
rocksdb_set_pause_background_work, FALSE);
+static MYSQL_SYSVAR_BOOL(
+ enable_ttl, rocksdb_enable_ttl, PLUGIN_VAR_RQCMDARG,
+ "Enable expired TTL records to be dropped during compaction.", nullptr,
+ nullptr, TRUE);
+
+static MYSQL_SYSVAR_BOOL(
+ enable_ttl_read_filtering, rocksdb_enable_ttl_read_filtering,
+ PLUGIN_VAR_RQCMDARG,
+ "For tables with TTL, expired records are skipped/filtered out during "
+ "processing and in query results. Disabling this will allow these records "
+ "to be seen, but as a result rows may disappear in the middle of "
+ "transactions as they are dropped during compaction. Use with caution.",
+ nullptr, nullptr, TRUE);
+
+static MYSQL_SYSVAR_INT(
+ debug_ttl_rec_ts, rocksdb_debug_ttl_rec_ts, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. Overrides the TTL of records to "
+ "now() + debug_ttl_rec_ts. The value can be +/- to simulate "
+ "a record inserted in the past vs a record inserted in the 'future'. "
+ "A value of 0 denotes that the variable is not set. This variable is a "
+ "no-op in non-debug builds.",
+ nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
+
+static MYSQL_SYSVAR_INT(
+ debug_ttl_snapshot_ts, rocksdb_debug_ttl_snapshot_ts, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. Sets the snapshot during compaction to "
+ "now() + debug_set_ttl_snapshot_ts. The value can be +/- to simulate "
+ "a snapshot in the past vs a snapshot created in the 'future'. "
+ "A value of 0 denotes that the variable is not set. This variable is a "
+ "no-op in non-debug builds.",
+ nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
+
+static MYSQL_SYSVAR_INT(
+ debug_ttl_read_filter_ts, rocksdb_debug_ttl_read_filter_ts,
+ PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. Overrides the TTL read filtering time to "
+ "time + debug_ttl_read_filter_ts. A value of 0 denotes that the variable "
+ "is not set. This variable is a no-op in non-debug builds.",
+ nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0);
+
+static MYSQL_SYSVAR_BOOL(
+ debug_ttl_ignore_pk, rocksdb_debug_ttl_ignore_pk, PLUGIN_VAR_RQCMDARG,
+ "For debugging purposes only. If true, compaction filtering will not occur "
+ "on PK TTL data. This variable is a no-op in non-debug builds.",
+ nullptr, nullptr, FALSE);
+
+static MYSQL_SYSVAR_BOOL(
+ reset_stats, rocksdb_reset_stats, PLUGIN_VAR_RQCMDARG,
+ "Reset the RocksDB internal statistics without restarting the DB.", nullptr,
+ rocksdb_set_reset_stats, FALSE);
+
+static MYSQL_SYSVAR_UINT(io_write_timeout, rocksdb_io_write_timeout_secs,
+ PLUGIN_VAR_RQCMDARG,
+ "Timeout for experimental I/O watchdog.", nullptr,
+ rocksdb_set_io_write_timeout, /* default */ 0,
+ /* min */ 0L,
+ /* max */ UINT_MAX, 0);
+
static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG,
"Enable two phase commit for MyRocks", nullptr,
nullptr, TRUE);
@@ -1071,6 +1353,13 @@ static MYSQL_SYSVAR_BOOL(
rocksdb_force_flush_memtable_now, rocksdb_force_flush_memtable_now_stub,
FALSE);
+static MYSQL_SYSVAR_BOOL(
+ force_flush_memtable_and_lzero_now,
+ rocksdb_force_flush_memtable_and_lzero_now_var, PLUGIN_VAR_RQCMDARG,
+ "Acts similar to force_flush_memtable_now, but also compacts all L0 files.",
+ rocksdb_force_flush_memtable_and_lzero_now,
+ rocksdb_force_flush_memtable_and_lzero_now_stub, FALSE);
+
static MYSQL_THDVAR_BOOL(
flush_memtable_on_analyze, PLUGIN_VAR_RQCMDARG,
"Forces memtable flush on ANALZYE table to get accurate cardinality",
@@ -1116,7 +1405,6 @@ static MYSQL_SYSVAR_BOOL(
"Counting SingleDelete as rocksdb_compaction_sequential_deletes", nullptr,
nullptr, rocksdb_compaction_sequential_deletes_count_sd);
-
static MYSQL_SYSVAR_BOOL(
print_snapshot_conflict_queries, rocksdb_print_snapshot_conflict_queries,
PLUGIN_VAR_RQCMDARG,
@@ -1187,14 +1475,23 @@ static MYSQL_SYSVAR_UINT(
RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0,
/* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0);
+static MYSQL_SYSVAR_BOOL(
+ large_prefix, rocksdb_large_prefix, PLUGIN_VAR_RQCMDARG,
+ "Support large index prefix length of 3072 bytes. If off, the maximum "
+ "index prefix length is 767.",
+ nullptr, nullptr, FALSE);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(lock_wait_timeout),
MYSQL_SYSVAR(deadlock_detect),
+ MYSQL_SYSVAR(deadlock_detect_depth),
MYSQL_SYSVAR(max_row_locks),
+ MYSQL_SYSVAR(write_batch_max_bytes),
MYSQL_SYSVAR(lock_scanned_rows),
MYSQL_SYSVAR(bulk_load),
+ MYSQL_SYSVAR(bulk_load_allow_unsorted),
MYSQL_SYSVAR(skip_unique_check_tables),
MYSQL_SYSVAR(trace_sst_api),
MYSQL_SYSVAR(commit_in_the_middle),
@@ -1205,14 +1502,19 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(enable_bulk_load_api),
MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(merge_combine_read_size),
+ MYSQL_SYSVAR(merge_tmp_file_removal_delay_ms),
MYSQL_SYSVAR(skip_bloom_filter_on_read),
MYSQL_SYSVAR(create_if_missing),
+ MYSQL_SYSVAR(concurrent_prepare),
+ MYSQL_SYSVAR(manual_wal_flush),
MYSQL_SYSVAR(create_missing_column_families),
MYSQL_SYSVAR(error_if_exists),
MYSQL_SYSVAR(paranoid_checks),
MYSQL_SYSVAR(rate_limiter_bytes_per_sec),
+ MYSQL_SYSVAR(sst_mgr_rate_bytes_per_sec),
MYSQL_SYSVAR(delayed_write_rate),
+ MYSQL_SYSVAR(max_latest_deadlocks),
MYSQL_SYSVAR(info_log_level),
MYSQL_SYSVAR(max_open_files),
MYSQL_SYSVAR(max_total_wal_size),
@@ -1221,9 +1523,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(persistent_cache_path),
MYSQL_SYSVAR(persistent_cache_size_mb),
MYSQL_SYSVAR(delete_obsolete_files_period_micros),
- MYSQL_SYSVAR(base_background_compactions),
- MYSQL_SYSVAR(max_background_compactions),
- MYSQL_SYSVAR(max_background_flushes),
+ MYSQL_SYSVAR(max_background_jobs),
MYSQL_SYSVAR(max_log_file_size),
MYSQL_SYSVAR(max_subcompactions),
MYSQL_SYSVAR(log_file_time_to_roll),
@@ -1234,7 +1534,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(wal_size_limit_mb),
MYSQL_SYSVAR(manifest_preallocation_size),
MYSQL_SYSVAR(use_direct_reads),
- MYSQL_SYSVAR(use_direct_writes),
+ MYSQL_SYSVAR(use_direct_io_for_flush_and_compaction),
MYSQL_SYSVAR(allow_mmap_reads),
MYSQL_SYSVAR(allow_mmap_writes),
MYSQL_SYSVAR(is_fd_close_on_exec),
@@ -1254,6 +1554,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(enable_write_thread_adaptive_yield),
MYSQL_SYSVAR(block_cache_size),
+ MYSQL_SYSVAR(sim_cache_size),
+ MYSQL_SYSVAR(use_clock_cache),
MYSQL_SYSVAR(cache_index_and_filter_blocks),
MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache),
MYSQL_SYSVAR(index_type),
@@ -1266,8 +1568,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(default_cf_options),
MYSQL_SYSVAR(override_cf_options),
-
- MYSQL_SYSVAR(background_sync),
+ MYSQL_SYSVAR(update_cf_options),
MYSQL_SYSVAR(flush_log_at_trx_commit),
MYSQL_SYSVAR(write_disable_wal),
@@ -1280,6 +1581,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(force_index_records_in_range),
MYSQL_SYSVAR(debug_optimizer_n_rows),
MYSQL_SYSVAR(force_compute_memtable_stats),
+ MYSQL_SYSVAR(force_compute_memtable_stats_cachetime),
MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality),
MYSQL_SYSVAR(compact_cf),
@@ -1290,6 +1592,15 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(strict_collation_exceptions),
MYSQL_SYSVAR(collect_sst_properties),
MYSQL_SYSVAR(force_flush_memtable_now),
+ MYSQL_SYSVAR(force_flush_memtable_and_lzero_now),
+ MYSQL_SYSVAR(enable_ttl),
+ MYSQL_SYSVAR(enable_ttl_read_filtering),
+ MYSQL_SYSVAR(debug_ttl_rec_ts),
+ MYSQL_SYSVAR(debug_ttl_snapshot_ts),
+ MYSQL_SYSVAR(debug_ttl_read_filter_ts),
+ MYSQL_SYSVAR(debug_ttl_ignore_pk),
+ MYSQL_SYSVAR(reset_stats),
+ MYSQL_SYSVAR(io_write_timeout),
MYSQL_SYSVAR(flush_memtable_on_analyze),
MYSQL_SYSVAR(seconds_between_stat_computes),
@@ -1310,13 +1621,15 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(validate_tables),
MYSQL_SYSVAR(table_stats_sampling_pct),
+
+ MYSQL_SYSVAR(large_prefix),
nullptr};
static rocksdb::WriteOptions
rdb_get_rocksdb_write_options(my_core::THD *const thd) {
rocksdb::WriteOptions opt;
- opt.sync = THDVAR(thd, flush_log_at_trx_commit) == 1;
+ opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
opt.disableWAL = THDVAR(thd, write_disable_wal);
opt.ignore_missing_column_families =
THDVAR(thd, write_ignore_missing_column_families);
@@ -1433,6 +1746,9 @@ String timeout_message(const char *command, const char *name1,
class Rdb_transaction {
protected:
ulonglong m_write_count = 0;
+ ulonglong m_insert_count = 0;
+ ulonglong m_update_count = 0;
+ ulonglong m_delete_count = 0;
ulonglong m_lock_count = 0;
bool m_is_delayed_snapshot = false;
@@ -1476,6 +1792,12 @@ public:
bool m_ddl_transaction;
/*
+ Tracks the number of tables in use through external_lock.
+ This should not be reset during start_tx().
+ */
+ int64_t m_n_mysql_tables_in_use = 0;
+
+ /*
for distinction between rdb_transaction_impl and rdb_writebatch_impl
when using walk tx list
*/
@@ -1502,7 +1824,8 @@ public:
}
int set_status_error(THD *const thd, const rocksdb::Status &s,
- const Rdb_key_def &kd, Rdb_tbl_def *const tbl_def) {
+ const Rdb_key_def &kd, Rdb_tbl_def *const tbl_def,
+ Rdb_table_handler *const table_handler) {
DBUG_ASSERT(!s.ok());
DBUG_ASSERT(tbl_def != nullptr);
@@ -1518,6 +1841,7 @@ public:
my_core::thd_mark_transaction_to_rollback(thd, false /*just statement*/);
m_detailed_error.copy(timeout_message(
"index", tbl_def->full_tablename().c_str(), kd.get_name().c_str()));
+ table_handler->m_lock_wait_timeout_counter.inc();
return HA_ERR_LOCK_WAIT_TIMEOUT;
}
@@ -1525,6 +1849,8 @@ public:
if (s.IsDeadlock()) {
my_core::thd_mark_transaction_to_rollback(thd,
false /* just statement */);
+ m_detailed_error = String();
+ table_handler->m_deadlock_counter.inc();
return HA_ERR_LOCK_DEADLOCK;
} else if (s.IsBusy()) {
rocksdb_snapshot_conflict_errors++;
@@ -1536,18 +1862,16 @@ public:
"Query: %s",
user_host_buff, thd->query());
}
+ m_detailed_error = String(" (snapshot conflict)", system_charset_info);
+ table_handler->m_deadlock_counter.inc();
return HA_ERR_LOCK_DEADLOCK;
}
- if (s.IsLockLimit()) {
- return HA_ERR_ROCKSDB_TOO_MANY_LOCKS;
- }
-
if (s.IsIOError() || s.IsCorruption()) {
rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
}
- my_error(ER_INTERNAL_ERROR, MYF(0), s.ToString().c_str());
- return HA_ERR_INTERNAL_ERROR;
+
+ return ha_rocksdb::rdb_error_to_mysql(s);
}
THD *get_thd() const { return m_thd; }
@@ -1588,6 +1912,13 @@ public:
}
}
+ void update_bytes_written(ulonglong bytes_written) {
+ if (m_tbl_io_perf != nullptr) {
+ m_tbl_io_perf->update_bytes_written(rocksdb_perf_context_level(m_thd),
+ bytes_written);
+ }
+ }
+
void set_params(int timeout_sec_arg, int max_row_locks_arg) {
m_timeout_sec = timeout_sec_arg;
m_max_row_locks = max_row_locks_arg;
@@ -1598,6 +1929,18 @@ public:
ulonglong get_write_count() const { return m_write_count; }
+ ulonglong get_insert_count() const { return m_insert_count; }
+
+ ulonglong get_update_count() const { return m_update_count; }
+
+ ulonglong get_delete_count() const { return m_delete_count; }
+
+ void incr_insert_count() { ++m_insert_count; }
+
+ void incr_update_count() { ++m_update_count; }
+
+ void incr_delete_count() { ++m_delete_count; }
+
int get_timeout_sec() const { return m_timeout_sec; }
ulonglong get_lock_count() const { return m_lock_count; }
@@ -1633,7 +1976,7 @@ public:
In both cases, rolling back transaction is safe. Nothing is written to
binlog.
*/
- my_printf_error(ER_UNKNOWN_ERROR, ERRSTR_ROLLBACK_ONLY, MYF(0));
+ my_error(ER_ROLLBACK_ONLY, MYF(0));
rollback();
return true;
} else {
@@ -1812,7 +2155,7 @@ public:
bool can_prepare() const {
if (m_rollback_only) {
- my_printf_error(ER_UNKNOWN_ERROR, ERRSTR_ROLLBACK_ONLY, MYF(0));
+ my_error(ER_ROLLBACK_ONLY, MYF(0));
return false;
}
return true;
@@ -1820,10 +2163,7 @@ public:
int rollback_to_savepoint(void *const savepoint) {
if (has_modifications()) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "MyRocks currently does not support ROLLBACK TO "
- "SAVEPOINT if modifying rows.",
- MYF(0));
+ my_error(ER_ROLLBACK_TO_SAVEPOINT, MYF(0));
m_rollback_only = true;
return HA_EXIT_FAILURE;
}
@@ -1926,6 +2266,9 @@ private:
release_tx();
m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
m_lock_count = 0;
set_tx_read_only(false);
m_rollback_only = false;
@@ -1935,6 +2278,9 @@ private:
public:
void rollback() override {
m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
m_lock_count = 0;
m_ddl_transaction = false;
if (m_rocksdb_tx) {
@@ -2038,6 +2384,7 @@ public:
rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family,
const rocksdb::Slice &key,
std::string *value) const override {
+ global_stats.queries[QUERIES_POINT].inc();
return m_rocksdb_tx->Get(m_read_opts, column_family, key, value);
}
@@ -2055,6 +2402,7 @@ public:
rocksdb::Iterator *
get_iterator(const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *const column_family) override {
+ global_stats.queries[QUERIES_RANGE].inc();
return m_rocksdb_tx->GetIterator(options, column_family);
}
@@ -2068,8 +2416,10 @@ public:
tx_opts.set_snapshot = false;
tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec);
tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect);
+ tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth);
+ tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes);
- write_opts.sync = THDVAR(m_thd, flush_log_at_trx_commit) == 1;
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -2180,6 +2530,9 @@ private:
reset();
m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
set_tx_read_only(false);
m_rollback_only = false;
return res;
@@ -2201,6 +2554,9 @@ public:
void rollback() override {
m_write_count = 0;
+ m_insert_count = 0;
+ m_update_count = 0;
+ m_delete_count = 0;
m_lock_count = 0;
release_snapshot();
@@ -2282,7 +2638,7 @@ public:
void start_tx() override {
reset();
- write_opts.sync = THDVAR(m_thd, flush_log_at_trx_commit) == 1;
+ write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -2326,14 +2682,21 @@ namespace {
class Rdb_perf_context_guard {
Rdb_io_perf m_io_perf;
- THD *m_thd;
+ Rdb_io_perf *m_io_perf_ptr;
+ Rdb_transaction *m_tx;
+ uint m_level;
-public:
+ public:
Rdb_perf_context_guard(const Rdb_perf_context_guard &) = delete;
Rdb_perf_context_guard &operator=(const Rdb_perf_context_guard &) = delete;
- explicit Rdb_perf_context_guard(THD *const thd) : m_thd(thd) {
- Rdb_transaction *&tx = get_tx_from_thd(m_thd);
+ explicit Rdb_perf_context_guard(Rdb_io_perf *io_perf, uint level)
+ : m_io_perf_ptr(io_perf), m_tx(nullptr), m_level(level) {
+ m_io_perf_ptr->start(m_level);
+ }
+
+ explicit Rdb_perf_context_guard(Rdb_transaction *tx, uint level)
+ : m_io_perf_ptr(nullptr), m_tx(tx), m_level(level) {
/*
if perf_context information is already being recorded, this becomes a
no-op
@@ -2344,9 +2707,10 @@ public:
}
~Rdb_perf_context_guard() {
- Rdb_transaction *&tx = get_tx_from_thd(m_thd);
- if (tx != nullptr) {
- tx->io_perf_end_and_record();
+ if (m_tx != nullptr) {
+ m_tx->io_perf_end_and_record();
+ } else if (m_io_perf_ptr != nullptr) {
+ m_io_perf_ptr->end_and_record(m_level);
}
}
};
@@ -2436,9 +2800,19 @@ static std::string rdb_xid_to_string(const XID &src) {
*/
static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__)))
DBUG_ASSERT(rdb != nullptr);
- rocksdb_wal_group_syncs++;
- const rocksdb::Status s = rdb->SyncWAL();
+
+ rocksdb::Status s;
+ /*
+ target_lsn is set to 0 when MySQL wants to sync the wal files
+ */
+ if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
+ rocksdb_wal_group_syncs++;
+ s = rdb->FlushWAL(target_lsn == 0 ||
+ rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
+ }
+
if (!s.ok()) {
+ rdb_log_status_error(s);
return HA_EXIT_FAILURE;
}
return HA_EXIT_SUCCESS;
@@ -2481,8 +2855,9 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx)
if (!tx->prepare(rdb_xid_to_string(xid))) {
return HA_EXIT_FAILURE;
}
- if (thd->durability_property == HA_IGNORE_DURABILITY
-#ifdef MARIAROCKS_NOT_YET
+ if (thd->durability_property == HA_IGNORE_DURABILITY )
+#ifdef MARIAROCKS_NOT_YET
+ (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER)) {
&&
THDVAR(thd, flush_log_at_trx_commit)) {
#endif
@@ -2510,33 +2885,65 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx)
this is needed to avoid crashes in XA scenarios
*/
static int rocksdb_commit_by_xid(handlerton *const hton, XID *const xid) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(hton != nullptr);
+ DBUG_ASSERT(xid != nullptr);
+ DBUG_ASSERT(commit_latency_stats != nullptr);
+
+ rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
+
const auto name = rdb_xid_to_string(*xid);
+ DBUG_ASSERT(!name.empty());
+
rocksdb::Transaction *const trx = rdb->GetTransactionByName(name);
+
if (trx == nullptr) {
- return HA_EXIT_FAILURE;
+ DBUG_RETURN(HA_EXIT_FAILURE);
}
+
const rocksdb::Status s = trx->Commit();
+
if (!s.ok()) {
- return HA_EXIT_FAILURE;
+ rdb_log_status_error(s);
+ DBUG_RETURN(HA_EXIT_FAILURE);
}
+
delete trx;
- return HA_EXIT_SUCCESS;
+
+ // `Add()` is implemented in a thread-safe manner.
+ commit_latency_stats->Add(timer.ElapsedNanos() / 1000);
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
}
static int
rocksdb_rollback_by_xid(handlerton *const hton MY_ATTRIBUTE((__unused__)),
XID *const xid) {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(hton != nullptr);
+ DBUG_ASSERT(xid != nullptr);
+ DBUG_ASSERT(rdb != nullptr);
+
const auto name = rdb_xid_to_string(*xid);
+
rocksdb::Transaction *const trx = rdb->GetTransactionByName(name);
+
if (trx == nullptr) {
- return HA_EXIT_FAILURE;
+ DBUG_RETURN(HA_EXIT_FAILURE);
}
+
const rocksdb::Status s = trx->Rollback();
+
if (!s.ok()) {
- return HA_EXIT_FAILURE;
+ rdb_log_status_error(s);
+ DBUG_RETURN(HA_EXIT_FAILURE);
}
+
delete trx;
- return HA_EXIT_SUCCESS;
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
}
/**
@@ -2621,13 +3028,16 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
DBUG_ASSERT(hton != nullptr);
DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(commit_latency_stats != nullptr);
- /* this will trigger saving of perf_context information */
- Rdb_perf_context_guard guard(thd);
+ rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true);
/* note: h->external_lock(F_UNLCK) is called after this function is called) */
Rdb_transaction *&tx = get_tx_from_thd(thd);
+ /* this will trigger saving of perf_context information */
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
if (tx != nullptr) {
if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT |
OPTION_BEGIN))) {
@@ -2636,8 +3046,9 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
- For a COMMIT statement that finishes a multi-statement transaction
- For a statement that has its own transaction
*/
- if (tx->commit())
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ if (tx->commit()) {
+ DBUG_RETURN(HA_ERR_ROCKSDB_COMMIT_FAILED);
+ }
} else {
/*
We get here when committing a statement within a transaction.
@@ -2655,13 +3066,16 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx)
}
}
+ // `Add()` is implemented in a thread-safe manner.
+ commit_latency_stats->Add(timer.ElapsedNanos() / 1000);
+
DBUG_RETURN(HA_EXIT_SUCCESS);
}
static int rocksdb_rollback(handlerton *const hton, THD *const thd,
bool rollback_tx) {
- Rdb_perf_context_guard guard(thd);
Rdb_transaction *&tx = get_tx_from_thd(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
if (tx != nullptr) {
if (rollback_tx) {
@@ -2784,7 +3198,82 @@ private:
"=========================================\n";
}
-public:
+ static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn,
+ const GL_INDEX_ID &gl_index_id,
+ bool is_last_path = false) {
+ std::string txn_data;
+
+ /* extract table name and index names using the index id */
+ std::string table_name = ddl_manager.safe_get_table_name(gl_index_id);
+ if (table_name.empty()) {
+ table_name =
+ "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+ }
+ auto kd = ddl_manager.safe_find(gl_index_id);
+ std::string idx_name =
+ (kd) ? kd->get_name()
+ : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id);
+
+ /* get the name of the column family */
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id);
+ std::string cf_name = cfh->GetName();
+
+ txn_data += format_string(
+ "TRANSACTIONID: %u\n"
+ "COLUMN FAMILY NAME: %s\n"
+ "WAITING KEY: %s\n"
+ "LOCK TYPE: %s\n"
+ "INDEX NAME: %s\n"
+ "TABLE NAME: %s\n",
+ txn.m_txn_id, cf_name.c_str(),
+ rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length())
+ .c_str(),
+ txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(),
+ table_name.c_str());
+ if (!is_last_path) {
+ txn_data += "---------------WAITING FOR---------------\n";
+ }
+ return txn_data;
+ }
+
+ static std::string
+ get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) {
+ std::string path_data;
+ if (path_entry.limit_exceeded) {
+ path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n";
+ } else {
+ path_data += "\n*** DEADLOCK PATH\n"
+ "=========================================\n";
+ for (auto it = path_entry.path.begin(); it != path_entry.path.end();
+ it++) {
+ auto txn = *it;
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ path_data += get_dlock_txn_info(txn, gl_index_id);
+ }
+
+ DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty());
+ /* print the first txn in the path to display the full deadlock cycle */
+ if (!path_entry.path.empty() && !path_entry.limit_exceeded) {
+ auto txn = path_entry.path[0];
+ const GL_INDEX_ID gl_index_id = {
+ txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(
+ txn.m_waiting_key.c_str()))};
+ path_data += get_dlock_txn_info(txn, gl_index_id, true);
+
+ /* prints the txn id of the transaction that caused the deadlock */
+ auto deadlocking_txn = *(path_entry.path.end() - 1);
+ path_data +=
+ format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n",
+ deadlocking_txn.m_txn_id);
+ }
+ }
+
+ return path_data;
+ }
+
+ public:
Rdb_snapshot_status() : m_data(get_header()) {}
std::string getResult() { return m_data + get_footer(); }
@@ -2804,12 +3293,23 @@ public:
#ifdef MARIAROCKS_NOT_YET
thd_security_context(tx->get_thd(), buffer, sizeof buffer, 0);
#endif
- m_data += format_string("---SNAPSHOT, ACTIVE %lld sec\n"
+ m_data += format_string(
+ "---SNAPSHOT, ACTIVE %lld sec\n"
"%s\n"
- "lock count %llu, write count %llu\n",
- (longlong)(curr_time - snapshot_timestamp),
- buffer,
- tx->get_lock_count(), tx->get_write_count());
+ "lock count %llu, write count %llu\n"
+ "insert count %llu, update count %llu, delete count %llu\n",
+ (longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(),
+ tx->get_write_count(), tx->get_insert_count(), tx->get_update_count(),
+ tx->get_delete_count());
+ }
+ }
+
+ void populate_deadlock_buffer() {
+ auto dlock_buffer = rdb->GetDeadlockInfoBuffer();
+ m_data += "----------LATEST DETECTED DEADLOCKS----------\n";
+
+ for (auto path_entry : dlock_buffer) {
+ m_data += get_dlock_path_info(path_entry);
}
}
};
@@ -2913,27 +3413,75 @@ static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd,
Rdb_snapshot_status showStatus;
Rdb_transaction::walk_tx_list(&showStatus);
+ showStatus.populate_deadlock_buffer();
/* Send the result data back to MySQL */
- return print_stats(thd, "SNAPSHOTS", "rocksdb", showStatus.getResult(),
- stat_print);
+ return print_stats(thd, "rocksdb", "", showStatus.getResult(), stat_print);
}
#endif
/*
- This is called for SHOW ENGINE ROCKSDB STATUS|LOGS|etc.
+ This is called for SHOW ENGINE ROCKSDB STATUS | LOGS | etc.
For now, produce info about live files (which gives an imprecise idea about
- what column families are there)
+ what column families are there).
*/
-
static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
stat_print_fn *const stat_print,
enum ha_stat_type stat_type) {
+ DBUG_ASSERT(hton != nullptr);
+ DBUG_ASSERT(thd != nullptr);
+ DBUG_ASSERT(stat_print != nullptr);
+
bool res = false;
+ char buf[100] = {'\0'};
+
if (stat_type == HA_ENGINE_STATUS) {
+ DBUG_ASSERT(rdb != nullptr);
+
std::string str;
+ /* Global DB Statistics */
+ if (rocksdb_stats) {
+ str = rocksdb_stats->ToString();
+
+ // Use the same format as internal RocksDB statistics entries to make
+ // sure that output will look unified.
+ DBUG_ASSERT(commit_latency_stats != nullptr);
+
+ snprintf(buf, sizeof(buf), "rocksdb.commit_latency statistics "
+ "Percentiles :=> 50 : %.2f 95 : %.2f "
+ "99 : %.2f 100 : %.2f\n",
+ commit_latency_stats->Percentile(50),
+ commit_latency_stats->Percentile(95),
+ commit_latency_stats->Percentile(99),
+ commit_latency_stats->Percentile(100));
+ str.append(buf);
+
+ uint64_t v = 0;
+
+ // Retrieve additional stalling related numbers from RocksDB and append
+ // them to the buffer meant for displaying detailed statistics. The intent
+ // here is to avoid adding another row to the query output because of
+ // just two numbers.
+ //
+ // NB! We're replacing hyphens with underscores in output to better match
+ // the existing naming convention.
+ if (rdb->GetIntProperty("rocksdb.is-write-stopped", &v)) {
+ snprintf(buf, sizeof(buf), "rocksdb.is_write_stopped COUNT : %lu\n", v);
+ str.append(buf);
+ }
+
+ if (rdb->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)) {
+ snprintf(buf, sizeof(buf), "rocksdb.actual_delayed_write_rate "
+ "COUNT : %lu\n",
+ v);
+ str.append(buf);
+ }
+
+ res |= print_stats(thd, "STATISTICS", "rocksdb", str, stat_print);
+ }
+
/* Per DB stats */
if (rdb->GetProperty("rocksdb.dbstats", &str)) {
res |= print_stats(thd, "DBSTATS", "rocksdb", str, stat_print);
@@ -2941,19 +3489,14 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
/* Per column family stats */
for (const auto &cf_name : cf_manager.get_cf_names()) {
- rocksdb::ColumnFamilyHandle *cfh;
- bool is_automatic;
-
- /*
- Only the cf name is important. Whether it was generated automatically
- does not matter, so is_automatic is ignored.
- */
- cfh = cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic);
- if (cfh == nullptr)
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
continue;
+ }
- if (!rdb->GetProperty(cfh, "rocksdb.cfstats", &str))
+ if (!rdb->GetProperty(cfh, "rocksdb.cfstats", &str)) {
continue;
+ }
res |= print_stats(thd, "CF_COMPACTION", cf_name, str, stat_print);
}
@@ -2963,20 +3506,23 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
std::unordered_set<const rocksdb::Cache *> cache_set;
size_t internal_cache_count = 0;
size_t kDefaultInternalCacheSize = 8 * 1024 * 1024;
- char buf[100];
dbs.push_back(rdb);
- cache_set.insert(rocksdb_tbl_options.block_cache.get());
+ cache_set.insert(rocksdb_tbl_options->block_cache.get());
+
for (const auto &cf_handle : cf_manager.get_all_cf()) {
rocksdb::ColumnFamilyDescriptor cf_desc;
cf_handle->GetDescriptor(&cf_desc);
auto *const table_factory = cf_desc.options.table_factory.get();
+
if (table_factory != nullptr) {
std::string tf_name = table_factory->Name();
+
if (tf_name.find("BlockBasedTable") != std::string::npos) {
const rocksdb::BlockBasedTableOptions *const bbt_opt =
reinterpret_cast<rocksdb::BlockBasedTableOptions *>(
table_factory->GetOptions());
+
if (bbt_opt != nullptr) {
if (bbt_opt->block_cache.get() != nullptr) {
cache_set.insert(bbt_opt->block_cache.get());
@@ -3008,8 +3554,44 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd,
snprintf(buf, sizeof(buf), "\nDefault Cache Capacity: %llu",
(ulonglong)internal_cache_count * kDefaultInternalCacheSize);
str.append(buf);
- res |= print_stats(thd, "Memory_Stats", "rocksdb", str, stat_print);
+ res |= print_stats(thd, "MEMORY_STATS", "rocksdb", str, stat_print);
#ifdef MARIAROCKS_NOT_YET
+ /* Show the background thread status */
+ std::vector<rocksdb::ThreadStatus> thread_list;
+ rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list);
+
+ if (!s.ok()) {
+ sql_print_error("RocksDB: Returned error (%s) from GetThreadList.\n",
+ s.ToString().c_str());
+ res |= true;
+ } else {
+ /* For each background thread retrieved, print out its information */
+ for (auto &it : thread_list) {
+ /* Only look at background threads. Ignore user threads, if any. */
+ if (it.thread_type > rocksdb::ThreadStatus::LOW_PRIORITY) {
+ continue;
+ }
+
+ str = "\nthread_type: " + it.GetThreadTypeName(it.thread_type) +
+ "\ncf_name: " + it.cf_name +
+ "\noperation_type: " + it.GetOperationName(it.operation_type) +
+ "\noperation_stage: " +
+ it.GetOperationStageName(it.operation_stage) +
+ "\nelapsed_time_ms: " +
+ it.MicrosToString(it.op_elapsed_micros);
+
+ for (auto &it_props :
+ it.InterpretOperationProperties(it.operation_type,
+ it.op_properties)) {
+ str += "\n" + it_props.first + ": " + std::to_string(it_props.second);
+ }
+
+ str += "\nstate_type: " + it.GetStateName(it.state_type);
+
+ res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id),
+ str, stat_print);
+ }
+ }
} else if (stat_type == HA_ENGINE_TRX) {
/* Handle the SHOW ENGINE ROCKSDB TRANSACTION STATUS command */
res |= rocksdb_show_snapshot_status(hton, thd, stat_print);
@@ -3058,16 +3640,10 @@ static int rocksdb_start_tx_and_assign_read_view(
user for whom the transaction should
be committed */
{
- Rdb_perf_context_guard guard(thd);
-
ulong const tx_isolation = my_core::thd_tx_isolation(thd);
if (tx_isolation != ISO_REPEATABLE_READ) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "Only REPEATABLE READ isolation level is supported "
- "for START TRANSACTION WITH CONSISTENT SNAPSHOT "
- "in RocksDB Storage Engine.",
- MYF(0));
+ my_error(ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT, MYF(0));
return HA_EXIT_FAILURE;
}
/*
@@ -3082,6 +3658,8 @@ static int rocksdb_start_tx_and_assign_read_view(
mysql_mutex_assert_owner(&LOCK_commit_ordered);
Rdb_transaction *const tx = get_or_create_tx(thd);
+ Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd));
+
DBUG_ASSERT(!tx->has_snapshot());
tx->set_tx_read_only(true);
rocksdb_register_tx(hton, thd, tx);
@@ -3122,11 +3700,15 @@ static void rocksdb_update_table_stats(
my_io_perf_t *r, my_io_perf_t *w, my_io_perf_t *r_blob,
my_io_perf_t *r_primary, my_io_perf_t *r_secondary,
page_stats_t *page_stats, comp_stats_t *comp_stats,
- int n_lock_wait, int n_lock_wait_timeout, const char *engine)) {
+ int n_lock_wait, int n_lock_wait_timeout, int n_lock_deadlock,
+ const char *engine)) {
my_io_perf_t io_perf_read;
+ my_io_perf_t io_perf_write;
my_io_perf_t io_perf;
page_stats_t page_stats;
comp_stats_t comp_stats;
+ uint lock_wait_timeout_stats;
+ uint deadlock_stats;
std::vector<std::string> tablenames;
/*
@@ -3136,6 +3718,7 @@ static void rocksdb_update_table_stats(
memset(&io_perf, 0, sizeof(io_perf));
memset(&page_stats, 0, sizeof(page_stats));
memset(&comp_stats, 0, sizeof(comp_stats));
+ memset(&io_perf_write, 0, sizeof(io_perf_write));
tablenames = rdb_open_tables.get_table_names();
@@ -3146,7 +3729,7 @@ static void rocksdb_update_table_stats(
char tablename_sys[NAME_LEN + 1];
bool is_partition;
- if (rdb_normalize_tablename(it, &str)) {
+ if (rdb_normalize_tablename(it, &str) != HA_EXIT_SUCCESS) {
/* Function needs to return void because of the interface and we've
* detected an error which shouldn't happen. There's no way to let
* caller know that something failed.
@@ -3168,6 +3751,10 @@ static void rocksdb_update_table_stats(
io_perf_read.bytes = table_handler->m_io_perf_read.bytes.load();
io_perf_read.requests = table_handler->m_io_perf_read.requests.load();
+ io_perf_write.bytes = table_handler->m_io_perf_write.bytes.load();
+ io_perf_write.requests = table_handler->m_io_perf_write.requests.load();
+ lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load();
+ deadlock_stats = table_handler->m_deadlock_counter.load();
/*
Convert from rocksdb timer to mysql timer. RocksDB values are
@@ -3193,8 +3780,9 @@ static void rocksdb_update_table_stats(
sizeof(dbname_sys));
my_core::filename_to_tablename(tablename.c_str(), tablename_sys,
sizeof(tablename_sys));
- (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, &io_perf,
- &io_perf, &io_perf, &io_perf, &page_stats, &comp_stats, 0, 0,
+ (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read,
+ &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats,
+ &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats,
rocksdb_hton_name);
}
}
@@ -3322,37 +3910,37 @@ static int rocksdb_init_func(void *const p) {
DBUG_ASSERT(!mysqld_embedded);
rocksdb_stats = rocksdb::CreateDBStatistics();
- rocksdb_db_options.statistics = rocksdb_stats;
+ rocksdb_db_options->statistics = rocksdb_stats;
if (rocksdb_rate_limiter_bytes_per_sec != 0) {
rocksdb_rate_limiter.reset(
rocksdb::NewGenericRateLimiter(rocksdb_rate_limiter_bytes_per_sec));
- rocksdb_db_options.rate_limiter = rocksdb_rate_limiter;
+ rocksdb_db_options->rate_limiter = rocksdb_rate_limiter;
}
- rocksdb_db_options.delayed_write_rate = rocksdb_delayed_write_rate;
+ rocksdb_db_options->delayed_write_rate = rocksdb_delayed_write_rate;
std::shared_ptr<Rdb_logger> myrocks_logger = std::make_shared<Rdb_logger>();
rocksdb::Status s = rocksdb::CreateLoggerFromOptions(
- rocksdb_datadir, rocksdb_db_options, &rocksdb_db_options.info_log);
+ rocksdb_datadir, *rocksdb_db_options, &rocksdb_db_options->info_log);
if (s.ok()) {
- myrocks_logger->SetRocksDBLogger(rocksdb_db_options.info_log);
+ myrocks_logger->SetRocksDBLogger(rocksdb_db_options->info_log);
}
- rocksdb_db_options.info_log = myrocks_logger;
+ rocksdb_db_options->info_log = myrocks_logger;
myrocks_logger->SetInfoLogLevel(
static_cast<rocksdb::InfoLogLevel>(rocksdb_info_log_level));
- rocksdb_db_options.wal_dir = rocksdb_wal_dir;
+ rocksdb_db_options->wal_dir = rocksdb_wal_dir;
- rocksdb_db_options.wal_recovery_mode =
+ rocksdb_db_options->wal_recovery_mode =
static_cast<rocksdb::WALRecoveryMode>(rocksdb_wal_recovery_mode);
- rocksdb_db_options.access_hint_on_compaction_start =
+ rocksdb_db_options->access_hint_on_compaction_start =
static_cast<rocksdb::Options::AccessHint>(
rocksdb_access_hint_on_compaction_start);
- if (rocksdb_db_options.allow_mmap_reads &&
- rocksdb_db_options.use_direct_reads) {
+ if (rocksdb_db_options->allow_mmap_reads &&
+ rocksdb_db_options->use_direct_reads) {
// allow_mmap_reads implies !use_direct_reads and RocksDB will not open if
// mmap_reads and direct_reads are both on. (NO_LINT_DEBUG)
sql_print_error("RocksDB: Can't enable both use_direct_reads "
@@ -3360,17 +3948,27 @@ static int rocksdb_init_func(void *const p) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
- if (rocksdb_db_options.allow_mmap_writes &&
- rocksdb_db_options.use_direct_writes) {
+ if (rocksdb_db_options->allow_mmap_writes &&
+ rocksdb_db_options->use_direct_io_for_flush_and_compaction) {
// See above comment for allow_mmap_reads. (NO_LINT_DEBUG)
- sql_print_error("RocksDB: Can't enable both use_direct_writes "
- "and allow_mmap_writes\n");
+ sql_print_error("RocksDB: Can't enable both "
+ "use_direct_io_for_flush_and_compaction and "
+ "allow_mmap_writes\n");
DBUG_RETURN(HA_EXIT_FAILURE);
}
+ // sst_file_manager will move deleted rocksdb sst files to trash_dir
+ // to be deleted in a background thread.
+ std::string trash_dir = std::string(rocksdb_datadir) + "/trash";
+ rocksdb_db_options->sst_file_manager.reset(
+ NewSstFileManager(rocksdb_db_options->env, myrocks_logger, trash_dir));
+
+ rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond(
+ rocksdb_sst_mgr_rate_bytes_per_sec);
+
std::vector<std::string> cf_names;
rocksdb::Status status;
- status = rocksdb::DB::ListColumnFamilies(rocksdb_db_options, rocksdb_datadir,
+ status = rocksdb::DB::ListColumnFamilies(*rocksdb_db_options, rocksdb_datadir,
&cf_names);
if (!status.ok()) {
/*
@@ -3387,9 +3985,7 @@ static int rocksdb_init_func(void *const p) {
sql_print_information(
"RocksDB: assuming that we're creating a new database");
} else {
- std::string err_text = status.ToString();
- sql_print_error("RocksDB: Error listing column families: %s",
- err_text.c_str());
+ rdb_log_status_error(status, "Error listing column families");
DBUG_RETURN(HA_EXIT_FAILURE);
}
} else
@@ -3399,18 +3995,28 @@ static int rocksdb_init_func(void *const p) {
std::vector<rocksdb::ColumnFamilyDescriptor> cf_descr;
std::vector<rocksdb::ColumnFamilyHandle *> cf_handles;
- rocksdb_tbl_options.index_type =
+ rocksdb_tbl_options->index_type =
(rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type;
- if (!rocksdb_tbl_options.no_block_cache) {
- rocksdb_tbl_options.block_cache =
- rocksdb::NewLRUCache(rocksdb_block_cache_size);
+ if (!rocksdb_tbl_options->no_block_cache) {
+ std::shared_ptr<rocksdb::Cache> block_cache = rocksdb_use_clock_cache
+ ? rocksdb::NewClockCache(rocksdb_block_cache_size)
+ : rocksdb::NewLRUCache(rocksdb_block_cache_size);
+ if (rocksdb_sim_cache_size > 0) {
+ // Simulated cache enabled
+ // Wrap block cache inside a simulated cache and pass it to RocksDB
+ rocksdb_tbl_options->block_cache =
+ rocksdb::NewSimCache(block_cache, rocksdb_sim_cache_size, 6);
+ } else {
+ // Pass block cache to RocksDB
+ rocksdb_tbl_options->block_cache = block_cache;
+ }
}
// Using newer BlockBasedTable format version for better compression
// and better memory allocation.
// See:
// https://github.com/facebook/rocksdb/commit/9ab5adfc59a621d12357580c94451d9f7320c2dd
- rocksdb_tbl_options.format_version = 2;
+ rocksdb_tbl_options->format_version = 2;
if (rocksdb_collect_sst_properties) {
properties_collector_factory =
@@ -3434,15 +4040,16 @@ static int rocksdb_init_func(void *const p) {
rocksdb::NewPersistentCache(
rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path),
cache_size_bytes, myrocks_logger, true, &pcache);
- rocksdb_tbl_options.persistent_cache = pcache;
+ rocksdb_tbl_options->persistent_cache = pcache;
} else if (strlen(rocksdb_persistent_cache_path)) {
sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb");
- DBUG_RETURN(1);
+ DBUG_RETURN(HA_EXIT_FAILURE);
}
- if (!rocksdb_cf_options_map.init(
- rocksdb_tbl_options, properties_collector_factory,
- rocksdb_default_cf_options, rocksdb_override_cf_options)) {
+ std::unique_ptr<Rdb_cf_options> cf_options_map(new Rdb_cf_options());
+ if (!cf_options_map->init(*rocksdb_tbl_options, properties_collector_factory,
+ rocksdb_default_cf_options,
+ rocksdb_override_cf_options)) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Failed to initialize CF options map.");
DBUG_RETURN(HA_EXIT_FAILURE);
@@ -3459,7 +4066,7 @@ static int rocksdb_init_func(void *const p) {
sql_print_information("RocksDB: Column Families at start:");
for (size_t i = 0; i < cf_names.size(); ++i) {
rocksdb::ColumnFamilyOptions opts;
- rocksdb_cf_options_map.get_cf_options(cf_names[i], &opts);
+ cf_options_map->get_cf_options(cf_names[i], &opts);
sql_print_information(" cf=%s", cf_names[i].c_str());
sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size);
@@ -3477,15 +4084,11 @@ static int rocksdb_init_func(void *const p) {
cf_descr.push_back(rocksdb::ColumnFamilyDescriptor(cf_names[i], opts));
}
- rocksdb::Options main_opts(rocksdb_db_options,
- rocksdb_cf_options_map.get_defaults());
+ rocksdb::Options main_opts(*rocksdb_db_options,
+ cf_options_map->get_defaults());
#ifdef MARIAROCKS_NOT_YET
#endif
- main_opts.env->SetBackgroundThreads(main_opts.max_background_flushes,
- rocksdb::Env::Priority::HIGH);
- main_opts.env->SetBackgroundThreads(main_opts.max_background_compactions,
- rocksdb::Env::Priority::LOW);
rocksdb::TransactionDBOptions tx_db_options;
tx_db_options.transaction_lock_timeout = 2; // 2 seconds
tx_db_options.custom_mutex_factory = std::make_shared<Rdb_mutex_factory>();
@@ -3496,10 +4099,8 @@ static int rocksdb_init_func(void *const p) {
// We won't start if we'll determine that there's a chance of data corruption
// because of incompatible options.
if (!status.ok()) {
- // NO_LINT_DEBUG
- sql_print_error("RocksDB: compatibility check against existing database "
- "options failed. %s",
- status.ToString().c_str());
+ rdb_log_status_error(
+ status, "Compatibility check against existing database options failed");
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -3507,11 +4108,10 @@ static int rocksdb_init_func(void *const p) {
main_opts, tx_db_options, rocksdb_datadir, cf_descr, &cf_handles, &rdb);
if (!status.ok()) {
- std::string err_text = status.ToString();
- sql_print_error("RocksDB: Error opening instance: %s", err_text.c_str());
+ rdb_log_status_error(status, "Error opening instance");
DBUG_RETURN(HA_EXIT_FAILURE);
}
- cf_manager.init(&rocksdb_cf_options_map, &cf_handles);
+ cf_manager.init(std::move(cf_options_map), &cf_handles);
if (dict_manager.init(rdb->GetBaseDB(), &cf_manager)) {
// NO_LINT_DEBUG
@@ -3546,9 +4146,7 @@ static int rocksdb_init_func(void *const p) {
status = rdb->EnableAutoCompaction(compaction_enabled_cf_handles);
if (!status.ok()) {
- const std::string err_text = status.ToString();
- // NO_LINT_DEBUG
- sql_print_error("RocksDB: Error enabling compaction: %s", err_text.c_str());
+ rdb_log_status_error(status, "Error enabling compaction");
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -3594,7 +4192,38 @@ static int rocksdb_init_func(void *const p) {
}
#endif
- sql_print_information("RocksDB instance opened");
+ err = my_error_register(rdb_get_error_messages, HA_ERR_ROCKSDB_FIRST,
+ HA_ERR_ROCKSDB_LAST);
+ if (err != 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Couldn't initialize error messages");
+ rdb_open_tables.m_hash.~Rdb_table_set();
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ // Creating an instance of HistogramImpl should only happen after RocksDB
+ // has been successfully initialized.
+ commit_latency_stats = new rocksdb::HistogramImpl();
+
+ // Construct a list of directories which will be monitored by I/O watchdog
+ // to make sure that we won't lose write access to them.
+ std::vector<std::string> directories;
+
+ // 1. Data directory.
+ directories.push_back(mysql_real_data_home);
+
+ // 2. Transaction logs.
+ if (myrocks::rocksdb_wal_dir && *myrocks::rocksdb_wal_dir) {
+ directories.push_back(myrocks::rocksdb_wal_dir);
+ }
+
+ io_watchdog = new Rdb_io_watchdog(directories);
+ io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs);
+
+ // NO_LINT_DEBUG
+ sql_print_information("MyRocks storage engine plugin has been successfully "
+ "initialized.");
+
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -3675,15 +4304,27 @@ static int rocksdb_done_func(void *const p) {
delete rdb;
rdb = nullptr;
+ delete commit_latency_stats;
+ commit_latency_stats = nullptr;
+
+ delete io_watchdog;
+ io_watchdog = nullptr;
+
// Disown the cache data since we're shutting down.
// This results in memory leaks but it improved the shutdown time.
// Don't disown when running under valgrind
#ifndef HAVE_purify
- if (rocksdb_tbl_options.block_cache) {
- rocksdb_tbl_options.block_cache->DisownData();
+ if (rocksdb_tbl_options->block_cache) {
+ rocksdb_tbl_options->block_cache->DisownData();
}
#endif /* HAVE_purify */
+ rocksdb_db_options = nullptr;
+ rocksdb_tbl_options = nullptr;
+ rocksdb_stats = nullptr;
+
+ my_error_unregister(HA_ERR_ROCKSDB_FIRST, HA_ERR_ROCKSDB_LAST);
+
DBUG_RETURN(error);
}
@@ -3697,6 +4338,15 @@ static inline void rocksdb_smart_seek(bool seek_backward,
}
}
+static inline void rocksdb_smart_next(bool seek_backward,
+ rocksdb::Iterator *const iter) {
+ if (seek_backward) {
+ iter->Prev();
+ } else {
+ iter->Next();
+ }
+}
+
/**
@brief
Example of simple lock controls. The "table_handler" it creates is a
@@ -3742,6 +4392,7 @@ Rdb_open_tables_map::get_table_handler(const char *const table_name) {
thr_lock_init(&table_handler->m_thr_lock);
#ifdef MARIAROCKS_NOT_YET
table_handler->m_io_perf_read.init();
+ table_handler->m_io_perf_write.init();
#endif
}
DBUG_ASSERT(table_handler->m_ref_count >= 0);
@@ -3824,7 +4475,12 @@ void ha_rocksdb::update_auto_incr_val() {
my_bitmap_map *const old_map =
dbug_tmp_use_all_columns(table, table->read_set);
- new_val = field->val_int() + 1;
+ new_val = field->val_int();
+ // don't increment if we would wrap around
+ if (new_val != std::numeric_limits<longlong>::max()) {
+ new_val++;
+ }
+
dbug_tmp_restore_column_map(table->read_set, old_map);
longlong auto_incr_val = m_tbl_def->m_auto_incr_val;
@@ -3850,11 +4506,12 @@ int ha_rocksdb::load_hidden_pk_value() {
Decode PK field from the key
*/
longlong hidden_pk_id = 0;
- if (read_hidden_pk_id_from_rowkey(&hidden_pk_id)) {
+ auto err = read_hidden_pk_id_from_rowkey(&hidden_pk_id);
+ if (err) {
if (is_new_snapshot) {
tx->release_snapshot();
}
- return HA_ERR_INTERNAL_ERROR;
+ return err;
}
hidden_pk_id++;
@@ -3895,12 +4552,13 @@ int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) {
// Get hidden primary key from old key slice
Rdb_string_reader reader(&rowkey_slice);
if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE)))
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
const int length= 8; /* was Field_longlong::PACK_LENGTH in FB MySQL tree */
const uchar *from = reinterpret_cast<const uchar *>(reader.read(length));
if (from == nullptr) {
- return HA_EXIT_FAILURE; /* Mem-comparable image doesn't have enough bytes */
+ /* Mem-comparable image doesn't have enough bytes */
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
*hidden_pk_id = rdb_netbuf_read_uint64(&from);
@@ -3961,6 +4619,11 @@ bool ha_rocksdb::same_table(const ha_rocksdb &other) const {
return m_tbl_def->base_tablename() == other.m_tbl_def->base_tablename();
}
+/**
+ @return
+ false OK
+ other Error inpacking the data
+*/
bool ha_rocksdb::init_with_fields() {
DBUG_ENTER_FUNC();
@@ -3976,6 +4639,86 @@ bool ha_rocksdb::init_with_fields() {
DBUG_RETURN(false); /* Ok */
}
+/*
+ If the key is a TTL key, we may need to filter it out.
+
+ The purpose of read filtering for tables with TTL is to ensure that
+ during a transaction a key which has expired already but not removed by
+ compaction yet is not returned to the user.
+
+ Without this the user might be hit with problems such as disappearing
+ rows within a transaction, etc, because the compaction filter ignores
+ snapshots when filtering keys.
+*/
+bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
+ const int64_t curr_ts) {
+ DBUG_ASSERT(kd.has_ttl());
+ DBUG_ASSERT(kd.m_ttl_rec_offset != UINT_MAX);
+
+ /*
+ Curr_ts can only be 0 if there are no snapshots open.
+ should_hide_ttl_rec can only be called when there is >=1 snapshots, unless
+ we are filtering on the write path (single INSERT/UPDATE) in which case
+ we are passed in the current time as curr_ts.
+
+ In the event curr_ts is 0, we always decide not to filter the record. We
+ also log a warning and increment a diagnostic counter.
+ */
+ if (curr_ts == 0) {
+ update_row_stats(ROWS_HIDDEN_NO_SNAPSHOT);
+ return false;
+ }
+
+ if (!rdb_is_ttl_read_filtering_enabled() || !rdb_is_ttl_enabled()) {
+ return false;
+ }
+
+ Rdb_string_reader reader(&ttl_rec_val);
+
+ /*
+ Find where the 8-byte ttl is for each record in this index.
+ */
+ uint64 ts;
+ if (!reader.read(kd.m_ttl_rec_offset) || reader.read_uint64(&ts)) {
+ /*
+ This condition should never be reached since all TTL records have an
+ 8 byte ttl field in front. Don't filter the record out, and log an error.
+ */
+ std::string buf;
+ buf = rdb_hexdump(ttl_rec_val.data(), ttl_rec_val.size(),
+ RDB_MAX_HEXDUMP_LEN);
+ const GL_INDEX_ID gl_index_id = kd.get_gl_index_id();
+ // NO_LINT_DEBUG
+ sql_print_error("Decoding ttl from PK value failed, "
+ "for index (%u,%u), val: %s",
+ gl_index_id.cf_id, gl_index_id.index_id, buf.c_str());
+ DBUG_ASSERT(0);
+ return false;
+ }
+
+ /* Hide record if it has expired before the current snapshot time. */
+ uint64 read_filter_ts = 0;
+#ifndef NDEBUG
+ read_filter_ts += rdb_dbug_set_ttl_read_filter_ts();
+#endif
+ return ts + kd.m_ttl_duration + read_filter_ts <=
+ static_cast<uint64>(curr_ts);
+}
+
+void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward) {
+ if (kd.has_ttl()) {
+ while (iter->Valid() &&
+ should_hide_ttl_rec(
+ kd, iter->value(),
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ rocksdb_smart_next(seek_backward, iter);
+ }
+ }
+}
+
/**
Convert record from table->record[0] form into a form that can be written
into rocksdb.
@@ -3985,14 +4728,81 @@ bool ha_rocksdb::init_with_fields() {
@param packed_rec OUT Data slice with record data.
*/
-void ha_rocksdb::convert_record_to_storage_format(
- const rocksdb::Slice &pk_packed_slice,
- Rdb_string_writer *const pk_unpack_info, rocksdb::Slice *const packed_rec) {
- DBUG_ASSERT_IMP(m_maybe_unpack_info, pk_unpack_info);
+int ha_rocksdb::convert_record_to_storage_format(
+ const struct update_row_info &row_info, rocksdb::Slice *const packed_rec) {
+ DBUG_ASSERT_IMP(m_maybe_unpack_info, row_info.new_pk_unpack_info);
+ DBUG_ASSERT(m_pk_descr != nullptr);
+
+ const rocksdb::Slice &pk_packed_slice = row_info.new_pk_slice;
+ Rdb_string_writer *const pk_unpack_info = row_info.new_pk_unpack_info;
+ bool has_ttl = m_pk_descr->has_ttl();
+ bool has_ttl_column = !m_pk_descr->m_ttl_column.empty();
+ bool ttl_in_pk = has_ttl_column && (row_info.ttl_pk_offset != UINT_MAX);
+
m_storage_record.length(0);
- /* All NULL bits are initially 0 */
- m_storage_record.fill(m_null_bytes_in_rec, 0);
+ if (has_ttl) {
+ /* If it's a TTL record, reserve space for 8 byte TTL value in front. */
+ m_storage_record.fill(ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_in_rec, 0);
+ m_ttl_bytes_updated = false;
+
+ /*
+ If the TTL is contained within the key, we use the offset to find the
+ TTL value and place it in the beginning of the value record.
+ */
+ if (ttl_in_pk) {
+ Rdb_string_reader reader(&pk_packed_slice);
+ const char *ts;
+ if (!reader.read(row_info.ttl_pk_offset) ||
+ !(ts = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) {
+ std::string buf;
+ buf = rdb_hexdump(pk_packed_slice.data(), pk_packed_slice.size(),
+ RDB_MAX_HEXDUMP_LEN);
+ const GL_INDEX_ID gl_index_id = m_pk_descr->get_gl_index_id();
+ // NO_LINT_DEBUG
+ sql_print_error("Decoding ttl from PK failed during insert, "
+ "for index (%u,%u), key: %s",
+ gl_index_id.cf_id, gl_index_id.index_id, buf.c_str());
+ return HA_EXIT_FAILURE;
+ }
+
+ char *const data = const_cast<char *>(m_storage_record.ptr());
+ memcpy(data, ts, ROCKSDB_SIZEOF_TTL_RECORD);
+#ifndef NDEBUG
+ // Adjust for test case if needed
+ rdb_netbuf_store_uint64(
+ reinterpret_cast<uchar *>(data),
+ rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(data)) +
+ rdb_dbug_set_ttl_rec_ts());
+#endif
+ // Also store in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ } else if (!has_ttl_column) {
+ /*
+ For implicitly generated TTL records we need to copy over the old
+ TTL value from the old record in the event of an update. It was stored
+ in m_ttl_bytes.
+
+ Otherwise, generate a timestamp using the current time.
+ */
+ if (!row_info.old_pk_slice.empty()) {
+ char *const data = const_cast<char *>(m_storage_record.ptr());
+ memcpy(data, m_ttl_bytes, sizeof(uint64));
+ } else {
+ uint64 ts = static_cast<uint64>(std::time(nullptr));
+#ifndef NDEBUG
+ ts += rdb_dbug_set_ttl_rec_ts();
+#endif
+ char *const data = const_cast<char *>(m_storage_record.ptr());
+ rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+ // Also store in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ }
+ } else {
+ /* All NULL bits are initially 0 */
+ m_storage_record.fill(m_null_bytes_in_rec, 0);
+ }
// If a primary key may have non-empty unpack_info for certain values,
// (m_maybe_unpack_info=TRUE), we write the unpack_info block. The block
@@ -4010,7 +4820,11 @@ void ha_rocksdb::convert_record_to_storage_format(
Field *const field = table->field[i];
if (m_encoder_arr[i].maybe_null()) {
- char *const data = (char *)m_storage_record.ptr();
+ char *data = const_cast<char *>(m_storage_record.ptr());
+ if (has_ttl) {
+ data += ROCKSDB_SIZEOF_TTL_RECORD;
+ }
+
if (field->is_null()) {
data[m_encoder_arr[i].m_null_offset] |= m_encoder_arr[i].m_null_mask;
/* Don't write anything for NULL values */
@@ -4047,6 +4861,33 @@ void ha_rocksdb::convert_record_to_storage_format(
/* Copy the field data */
const uint len = field->pack_length_in_rec();
m_storage_record.append(reinterpret_cast<char *>(field->ptr), len);
+
+ /*
+ Check if this is the TTL field within the table, if so store the TTL
+ in the front of the record as well here.
+ */
+ if (has_ttl && has_ttl_column &&
+ i == m_pk_descr->get_ttl_field_offset()) {
+ DBUG_ASSERT(len == ROCKSDB_SIZEOF_TTL_RECORD);
+ DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
+ DBUG_ASSERT(m_pk_descr->get_ttl_field_offset() != UINT_MAX);
+
+ char *const data = const_cast<char *>(m_storage_record.ptr());
+ uint64 ts = uint8korr(field->ptr);
+#ifndef NDEBUG
+ ts += rdb_dbug_set_ttl_rec_ts();
+#endif
+ rdb_netbuf_store_uint64(reinterpret_cast<uchar *>(data), ts);
+
+ // If this is an update and the timestamp has been updated, take note
+ // so we can avoid updating SKs unnecessarily.
+ if (!row_info.old_pk_slice.empty()) {
+ m_ttl_bytes_updated =
+ memcmp(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
+ // Store timestamp in m_ttl_bytes to propagate to update_sk
+ memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD);
+ }
}
}
@@ -4067,6 +4908,8 @@ void ha_rocksdb::convert_record_to_storage_format(
*packed_rec =
rocksdb::Slice(m_storage_record.ptr(), m_storage_record.length());
+
+ return HA_EXIT_SUCCESS;
}
/*
@@ -4097,7 +4940,9 @@ void ha_rocksdb::setup_read_decoders() {
continue;
}
+ // bitmap is cleared on index merge, but it still needs to decode columns
if (m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums ||
+ bitmap_is_clear_all(table->read_set) ||
bitmap_is_set(table->read_set, table->field[i]->field_index)) {
// We will need to decode this field
m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size});
@@ -4183,7 +5028,7 @@ int ha_rocksdb::convert_blob_from_storage_format(
const char *data_len_str;
if (!(data_len_str = reader->read(length_bytes))) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
memcpy(blob->ptr, data_len_str, length_bytes);
@@ -4192,7 +5037,7 @@ int ha_rocksdb::convert_blob_from_storage_format(
reinterpret_cast<const uchar*>(data_len_str), length_bytes);
const char *blob_ptr;
if (!(blob_ptr = reader->read(data_len))) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
if (decode) {
@@ -4212,7 +5057,7 @@ int ha_rocksdb::convert_varchar_from_storage_format(
{
const char *data_len_str;
if (!(data_len_str = reader->read(field_var->length_bytes)))
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
uint data_len;
/* field_var->length_bytes is 1 or 2 */
@@ -4225,11 +5070,11 @@ int ha_rocksdb::convert_varchar_from_storage_format(
if (data_len > field_var->field_length) {
/* The data on disk is longer than table DDL allows? */
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
if (!reader->read(data_len)) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
if (decode) {
@@ -4248,7 +5093,7 @@ int ha_rocksdb::convert_field_from_storage_format(
const char *data_bytes;
if (len > 0) {
if ((data_bytes = reader->read(len)) == nullptr) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
if (decode)
@@ -4302,33 +5147,44 @@ int ha_rocksdb::convert_record_from_storage_format(
uint16 unpack_info_len = 0;
rocksdb::Slice unpack_slice;
+ /* If it's a TTL record, skip the 8 byte TTL value */
+ const char *ttl_bytes;
+ if (m_pk_descr->has_ttl()) {
+ if ((ttl_bytes = reader.read(ROCKSDB_SIZEOF_TTL_RECORD))) {
+ memcpy(m_ttl_bytes, ttl_bytes, ROCKSDB_SIZEOF_TTL_RECORD);
+ } else {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
/* Other fields are decoded from the value */
const char *null_bytes = nullptr;
if (m_null_bytes_in_rec && !(null_bytes = reader.read(m_null_bytes_in_rec))) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
if (m_maybe_unpack_info) {
- unpack_info = reader.read(RDB_UNPACK_HEADER_SIZE);
-
- if (!unpack_info || unpack_info[0] != RDB_UNPACK_DATA_TAG) {
- return HA_ERR_INTERNAL_ERROR;
+ unpack_info = reader.get_current_ptr();
+ if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) ||
+ !reader.read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
unpack_info_len =
rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(unpack_info + 1));
unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len);
- reader.read(unpack_info_len - RDB_UNPACK_HEADER_SIZE);
+ reader.read(unpack_info_len -
+ Rdb_key_def::get_unpack_header_size(unpack_info[0]));
}
- if (m_pk_descr->unpack_record(table, buf, &rowkey_slice,
- unpack_info ? &unpack_slice : nullptr,
- false /* verify_checksum */)) {
- return HA_ERR_INTERNAL_ERROR;
+ int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice,
+ unpack_info ? &unpack_slice : nullptr,
+ false /* verify_checksum */);
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
}
- int err = HA_EXIT_SUCCESS;
for (auto it = m_decoders_vect.begin(); it != m_decoders_vect.end(); it++) {
const Rdb_field_encoder *const field_dec = it->m_field_enc;
const bool decode = it->m_decode;
@@ -4339,8 +5195,9 @@ int ha_rocksdb::convert_record_from_storage_format(
Field *const field = table->field[field_dec->m_field_index];
/* Skip the bytes we need to skip */
- if (it->m_skip && !reader.read(it->m_skip))
- return HA_ERR_INTERNAL_ERROR;
+ if (it->m_skip && !reader.read(it->m_skip)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
uint field_offset = field->ptr - table->record[0];
uint null_offset = field->null_offset();
@@ -4407,7 +5264,7 @@ int ha_rocksdb::convert_record_from_storage_format(
if (stored_key_chksum != computed_key_chksum) {
m_pk_descr->report_checksum_mismatch(true, key->data(), key->size());
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
}
DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum2",
@@ -4415,13 +5272,13 @@ int ha_rocksdb::convert_record_from_storage_format(
if (stored_val_chksum != computed_val_chksum) {
m_pk_descr->report_checksum_mismatch(false, value->data(),
value->size());
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
}
m_row_checksums_checked++;
}
if (reader.remaining_bytes())
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
return HA_EXIT_SUCCESS;
@@ -4474,8 +5331,7 @@ void ha_rocksdb::setup_field_converters() {
If hidden pk exists, we skip this check since the field will never be
part of the hidden pk.
*/
- if (!has_hidden_pk(table) &&
- field->part_of_key.is_set(table->s->primary_key)) {
+ if (!has_hidden_pk(table)) {
KEY *const pk_info = &table->key_info[table->s->primary_key];
for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) {
/* key_part->fieldnr is counted from 1 */
@@ -4537,16 +5393,10 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
m_pk_descr->setup(table_arg, tbl_def_arg);
m_pk_tuple = reinterpret_cast<uchar *>(my_malloc(key_len, MYF(0)));
- if (m_pk_tuple == nullptr) {
- goto error;
- }
pack_key_len = m_pk_descr->max_storage_fmt_length();
m_pk_packed_tuple =
reinterpret_cast<uchar *>(my_malloc(pack_key_len, MYF(0)));
- if (m_pk_packed_tuple == nullptr) {
- goto error;
- }
/* Sometimes, we may use m_sk_packed_tuple for storing packed PK */
max_packed_sk_len = pack_key_len;
@@ -4563,40 +5413,40 @@ int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg,
}
}
- if (!(m_sk_packed_tuple =
- reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) ||
- !(m_sk_match_prefix_buf =
- reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) ||
- !(m_sk_packed_tuple_old =
- reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) ||
- !(m_end_key_packed_tuple =
- reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) ||
- !((m_pack_buffer = reinterpret_cast<uchar *>(
- my_malloc(max_packed_sk_len, MYF(0)))))) {
- goto error;
- }
+ m_sk_packed_tuple =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_sk_match_prefix_buf =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_sk_packed_tuple_old =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_end_key_packed_tuple =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_pack_buffer =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
/*
If inplace alter is happening, allocate special buffers for unique
secondary index duplicate checking.
*/
- if (alloc_alter_buffers &&
- (!(m_dup_sk_packed_tuple =
- reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)))) ||
- !(m_dup_sk_packed_tuple_old = reinterpret_cast<uchar *>(
- my_malloc(max_packed_sk_len, MYF(0)))))) {
- goto error;
+ if (alloc_alter_buffers) {
+ m_dup_sk_packed_tuple =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ m_dup_sk_packed_tuple_old =
+ reinterpret_cast<uchar *>(my_malloc(max_packed_sk_len, MYF(0)));
+ }
+
+ if (m_pk_tuple == nullptr || m_pk_packed_tuple == nullptr ||
+ m_sk_packed_tuple == nullptr || m_sk_packed_tuple_old == nullptr ||
+ m_end_key_packed_tuple == nullptr || m_pack_buffer == nullptr ||
+ (alloc_alter_buffers && (m_dup_sk_packed_tuple == nullptr ||
+ m_dup_sk_packed_tuple_old == nullptr))) {
+ // One or more of the above allocations failed. Clean up and exit
+ free_key_buffers();
+
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
}
DBUG_RETURN(HA_EXIT_SUCCESS);
-
-error:
- // If we're here then this means that at some point above an allocation may
- // have failed. To avoid any resource leaks and maintain a clear contract
- // we'll clean up before returning the error code.
- free_key_buffers();
-
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
}
void ha_rocksdb::free_key_buffers() {
@@ -4662,27 +5512,37 @@ void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) {
m_skip_unique_check = regex_handler.matches(m_tbl_def->base_tablename());
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
DBUG_ENTER_FUNC();
- if (close()) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ int err = close();
+ if (err) {
+ DBUG_RETURN(err);
}
m_table_handler = rdb_open_tables.get_table_handler(name);
if (m_table_handler == nullptr) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
}
my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock,
nullptr);
m_io_perf.init(&m_table_handler->m_table_perf_context,
- &m_table_handler->m_io_perf_read, &stats);
+ &m_table_handler->m_io_perf_read,
+ &m_table_handler->m_io_perf_write, &stats);
+ Rdb_perf_context_guard guard(&m_io_perf,
+ rocksdb_perf_context_level(ha_thd()));
std::string fullname;
- if (rdb_normalize_tablename(name, &fullname)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ err = rdb_normalize_tablename(name, &fullname);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
}
m_tbl_def = ddl_manager.find(fullname);
@@ -4690,7 +5550,7 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
my_error(ER_INTERNAL_ERROR, MYF(0),
"Attempt to open a table that is not present in RocksDB-SE data "
"dictionary");
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ DBUG_RETURN(HA_ERR_ROCKSDB_INVALID_TABLE);
}
m_lock_rows = RDB_LOCK_NONE;
@@ -4704,7 +5564,7 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
key_used_on_scan = table->s->primary_key;
// close() above has already called free_key_buffers(). No need to do it here.
- int err = alloc_key_buffers(table, m_tbl_def);
+ err = alloc_key_buffers(table, m_tbl_def);
if (err) {
DBUG_RETURN(err);
@@ -4740,13 +5600,14 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) {
if (table->found_next_number_field)
load_auto_incr_value();
- if (has_hidden_pk(table) && load_hidden_pk_value()) {
+ if (has_hidden_pk(table) &&
+ (err = load_hidden_pk_value()) != HA_EXIT_SUCCESS) {
free_key_buffers();
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ DBUG_RETURN(err);
}
/* Index block size in MyRocks: used by MySQL in query optimization */
- stats.block_size = rocksdb_tbl_options.block_size;
+ stats.block_size = rocksdb_tbl_options->block_size;
#ifdef MARIAROCKS_NOT_YET // MDEV-10976
/* Determine at open whether we can use Read Free Replication or not */
@@ -4775,9 +5636,58 @@ int ha_rocksdb::close(void) {
m_table_handler = nullptr;
}
+ // These are needed to suppress valgrind errors in rocksdb.partition
+ m_storage_record.free();
+ m_last_rowkey.free();
+ m_sk_tails.free();
+ m_sk_tails_old.free();
+ m_pk_unpack_info.free();
+
DBUG_RETURN(HA_EXIT_SUCCESS);
}
+static const char *rdb_error_messages[] = {
+ "Table must have a PRIMARY KEY.",
+ "Specifying DATA DIRECTORY for an individual table is not supported.",
+ "Specifying INDEX DIRECTORY for an individual table is not supported.",
+ "RocksDB commit failed.",
+ "Failure during bulk load operation.",
+ "Found data corruption.",
+ "CRC checksum mismatch.",
+ "Invalid table.",
+ "Could not access RocksDB properties.",
+ "File I/O error during merge/sort operation.",
+ "RocksDB status: not found.",
+ "RocksDB status: corruption.",
+ "RocksDB status: invalid argument.",
+ "RocksDB status: io error.",
+ "RocksDB status: no space.",
+ "RocksDB status: merge in progress.",
+ "RocksDB status: incomplete.",
+ "RocksDB status: shutdown in progress.",
+ "RocksDB status: timed out.",
+ "RocksDB status: aborted.",
+ "RocksDB status: lock limit reached.",
+ "RocksDB status: busy.",
+ "RocksDB status: deadlock.",
+ "RocksDB status: expired.",
+ "RocksDB status: try again.",
+};
+
+static_assert((sizeof(rdb_error_messages) / sizeof(rdb_error_messages[0])) ==
+ ((HA_ERR_ROCKSDB_LAST - HA_ERR_ROCKSDB_FIRST) + 1),
+ "Number of error messages doesn't match number of error codes");
+
+//psergey-merge: do we need this in MariaDB: we have get_error_messages
+//below...
+#if 0
+static const char *rdb_get_error_message(int nr) {
+ return rdb_error_messages[nr - HA_ERR_ROCKSDB_FIRST];
+}
+#endif
+
+static const char **rdb_get_error_messages(int nr) { return rdb_error_messages; }
+
bool ha_rocksdb::get_error_message(const int error, String *const buf) {
DBUG_ENTER_FUNC();
@@ -4786,44 +5696,97 @@ bool ha_rocksdb::get_error_message(const int error, String *const buf) {
static_assert(HA_ERR_ROCKSDB_LAST > HA_ERR_LAST,
"HA_ERR_ROCKSDB_LAST > HA_ERR_LAST");
- DBUG_ASSERT(error > 0);
- DBUG_ASSERT(error <= HA_ERR_ROCKSDB_LAST);
DBUG_ASSERT(buf != nullptr);
- Rdb_transaction *const tx = get_tx_from_thd(ha_thd());
- bool temp_error = false;
+ if (error == HA_ERR_LOCK_WAIT_TIMEOUT || error == HA_ERR_LOCK_DEADLOCK) {
+ Rdb_transaction *const tx = get_tx_from_thd(ha_thd());
+ DBUG_ASSERT(tx != nullptr);
+ buf->append(tx->m_detailed_error);
+ DBUG_RETURN(true);
+ }
+
+ if (error >= HA_ERR_ROCKSDB_FIRST && error <= HA_ERR_ROCKSDB_LAST) {
+ buf->append(rdb_error_messages[error - HA_ERR_ROCKSDB_FIRST]);
+ }
+
+ // We can be called with the values which are < HA_ERR_FIRST because most
+ // MySQL internal functions will just return HA_EXIT_FAILURE in case of
+ // an error.
- switch (error) {
- case HA_ERR_ROCKSDB_PK_REQUIRED:
- buf->append("Table must have a PRIMARY KEY.");
+ DBUG_RETURN(false);
+}
+
+/*
+ Generalized way to convert RocksDB status errors into MySQL error code, and
+ print error message.
+
+ Each error code below maps to a RocksDB status code found in:
+ rocksdb/include/rocksdb/status.h
+*/
+int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s,
+ const char *opt_msg) {
+ DBUG_ASSERT(!s.ok());
+
+ int err;
+ switch (s.code()) {
+ case rocksdb::Status::Code::kOk:
+ err = HA_EXIT_SUCCESS;
break;
- case HA_ERR_ROCKSDB_UNIQUE_NOT_SUPPORTED:
- buf->append("Unique indexes are not supported.");
+ case rocksdb::Status::Code::kNotFound:
+ err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND;
break;
- case HA_ERR_ROCKSDB_TOO_MANY_LOCKS:
- buf->append("Number of locks held reached @@rocksdb_max_row_locks.");
+ case rocksdb::Status::Code::kCorruption:
+ err = HA_ERR_ROCKSDB_STATUS_CORRUPTION;
break;
- case HA_ERR_LOCK_WAIT_TIMEOUT:
- DBUG_ASSERT(tx != nullptr);
- buf->append(tx->m_detailed_error);
- temp_error = true;
+ case rocksdb::Status::Code::kNotSupported:
+ err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED;
break;
- case HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED:
- buf->append("Specifying DATA DIRECTORY for an individual table is not "
- "supported.");
+ case rocksdb::Status::Code::kInvalidArgument:
+ err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT;
break;
- case HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED:
- buf->append("Specifying INDEX DIRECTORY for an individual table is not "
- "supported.");
+ case rocksdb::Status::Code::kIOError:
+ err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE
+ : HA_ERR_ROCKSDB_STATUS_IO_ERROR;
break;
- default:
- // We can be called with the values which are < HA_ERR_FIRST because most
- // MySQL internal functions will just return HA_EXIT_FAILURE in case of
- // an error.
+ case rocksdb::Status::Code::kMergeInProgress:
+ err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS;
break;
+ case rocksdb::Status::Code::kIncomplete:
+ err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE;
+ break;
+ case rocksdb::Status::Code::kShutdownInProgress:
+ err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS;
+ break;
+ case rocksdb::Status::Code::kTimedOut:
+ err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT;
+ break;
+ case rocksdb::Status::Code::kAborted:
+ err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT
+ : HA_ERR_ROCKSDB_STATUS_ABORTED;
+ break;
+ case rocksdb::Status::Code::kBusy:
+ err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK
+ : HA_ERR_ROCKSDB_STATUS_BUSY;
+ break;
+ case rocksdb::Status::Code::kExpired:
+ err = HA_ERR_ROCKSDB_STATUS_EXPIRED;
+ break;
+ case rocksdb::Status::Code::kTryAgain:
+ err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN;
+ break;
+ default:
+ DBUG_ASSERT(0);
+ return -1;
+ }
+
+ if (opt_msg) {
+ my_error(ER_RDB_STATUS_MSG, MYF(0), opt_msg, s.code(),
+ s.ToString().c_str());
+ } else {
+ my_error(ER_RDB_STATUS_GENERAL, MYF(0), s.code(), s.ToString().c_str());
}
- DBUG_RETURN(temp_error);
+ return err;
}
/* MyRocks supports only the following collations for indexed columns */
@@ -4973,12 +5936,10 @@ int ha_rocksdb::create_cfs(
}
collation_err += get_charset_name(coll);
}
- my_printf_error(
- ER_UNKNOWN_ERROR, "Unsupported collation on string indexed "
- "column %s.%s Use binary collation (%s).",
- MYF(0), tbl_def_arg->full_tablename().c_str(),
- table_arg->key_info[i].key_part[part].field->field_name,
- collation_err.c_str());
+ my_error(ER_UNSUPPORTED_COLLATION, MYF(0),
+ tbl_def_arg->full_tablename().c_str(),
+ table_arg->key_info[i].key_part[part].field->field_name,
+ collation_err.c_str());
DBUG_RETURN(HA_EXIT_FAILURE);
}
}
@@ -4994,32 +5955,17 @@ int ha_rocksdb::create_cfs(
std::string cf_name = generate_cf_name(i, table_arg, tbl_def_arg,
&per_part_match_found);
- const char *const key_name = get_key_name(i, table_arg, tbl_def_arg);
-
- if (looks_like_per_index_cf_typo(cf_name.c_str())) {
- my_error(ER_NOT_SUPPORTED_YET, MYF(0),
- "column family name looks like a typo of $per_index_cf.");
- DBUG_RETURN(HA_EXIT_FAILURE);
- }
-
// Prevent create from using the system column family.
- if (!cf_name.empty() && strcmp(DEFAULT_SYSTEM_CF_NAME,
- cf_name.c_str()) == 0) {
+ if (cf_name == DEFAULT_SYSTEM_CF_NAME) {
my_error(ER_WRONG_ARGUMENTS, MYF(0),
"column family not valid for storing index data.");
DBUG_RETURN(HA_EXIT_FAILURE);
}
- bool is_auto_cf_flag;
-
// Here's how `get_or_create_cf` will use the input parameters:
//
// `cf_name` - will be used as a CF name.
- // `key_name` - will be only used in case of "$per_index_cf".
- cf_handle =
- cf_manager.get_or_create_cf(rdb, cf_name.c_str(),
- tbl_def_arg->full_tablename(), key_name,
- &is_auto_cf_flag);
+ cf_handle = cf_manager.get_or_create_cf(rdb, cf_name);
if (!cf_handle) {
DBUG_RETURN(HA_EXIT_FAILURE);
@@ -5029,7 +5975,6 @@ int ha_rocksdb::create_cfs(
cf.cf_handle = cf_handle;
cf.is_reverse_cf = Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str());
- cf.is_auto_cf = is_auto_cf_flag;
cf.is_per_partition_cf = per_part_match_found;
}
@@ -5071,6 +6016,7 @@ int ha_rocksdb::create_inplace_key_defs(
uint i;
for (i = 0; i < tbl_def_arg->m_key_count; i++) {
const auto &it = old_key_pos.find(get_key_name(i, table_arg, tbl_def_arg));
+
if (it != old_key_pos.end()) {
/*
Found matching index in old table definition, so copy it over to the
@@ -5078,12 +6024,9 @@ int ha_rocksdb::create_inplace_key_defs(
*/
const Rdb_key_def &okd = *old_key_descr[it->second];
- uint16 index_dict_version = 0;
- uchar index_type = 0;
- uint16 kv_version = 0;
const GL_INDEX_ID gl_index_id = okd.get_gl_index_id();
- if (!dict_manager.get_index_info(gl_index_id, &index_dict_version,
- &index_type, &kv_version)) {
+ struct Rdb_index_info index_info;
+ if (!dict_manager.get_index_info(gl_index_id, &index_info)) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Could not get index information "
"for Index Number (%u,%u), table %s",
@@ -5092,16 +6035,25 @@ int ha_rocksdb::create_inplace_key_defs(
DBUG_RETURN(HA_EXIT_FAILURE);
}
+ uint32 ttl_rec_offset =
+ Rdb_key_def::has_index_flag(index_info.m_index_flags,
+ Rdb_key_def::TTL_FLAG)
+ ? Rdb_key_def::calculate_index_flag_offset(
+ index_info.m_index_flags, Rdb_key_def::TTL_FLAG)
+ : UINT_MAX;
+
/*
We can't use the copy constructor because we need to update the
keynr within the pack_info for each field and the keyno of the keydef
itself.
*/
new_key_descr[i] = std::make_shared<Rdb_key_def>(
- okd.get_index_number(), i, okd.get_cf(), index_dict_version,
- index_type, kv_version, okd.m_is_reverse_cf, okd.m_is_auto_cf,
+ okd.get_index_number(), i, okd.get_cf(),
+ index_info.m_index_dict_version, index_info.m_index_type,
+ index_info.m_kv_version, okd.m_is_reverse_cf,
okd.m_is_per_partition_cf, okd.m_name.c_str(),
- dict_manager.get_stats(gl_index_id));
+ dict_manager.get_stats(gl_index_id), index_info.m_index_flags,
+ ttl_rec_offset, index_info.m_ttl_duration);
} else if (create_key_def(table_arg, i, tbl_def_arg, &new_key_descr[i],
cfs[i])) {
DBUG_RETURN(HA_EXIT_FAILURE);
@@ -5158,14 +6110,66 @@ std::unordered_map<std::string, uint> ha_rocksdb::get_old_key_positions(
KEY *const new_key = &table_arg->key_info[it->second];
- if (!compare_key_parts(old_key, new_key)) {
- old_key_pos[old_key->name] = i;
+ /*
+ Check that the key is identical between old and new tables.
+ If not, we still need to create a new index.
+
+ The exception is if there is an index changed from unique to non-unique,
+ in these cases we don't need to rebuild as they are stored the same way in
+ RocksDB.
+ */
+ bool unique_to_non_unique =
+ ((old_key->flags ^ new_key->flags) == HA_NOSAME) &&
+ (old_key->flags & HA_NOSAME);
+
+ if (compare_keys(old_key, new_key) && !unique_to_non_unique) {
+ continue;
+ }
+
+ /* Check to make sure key parts match. */
+ if (compare_key_parts(old_key, new_key)) {
+ continue;
}
+
+ old_key_pos[old_key->name] = i;
}
DBUG_RETURN(old_key_pos);
}
+/* Check to see if two keys are identical. */
+int ha_rocksdb::compare_keys(const KEY *const old_key,
+ const KEY *const new_key) const {
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT(old_key != nullptr);
+ DBUG_ASSERT(new_key != nullptr);
+
+ /* Check index name. */
+ if (strcmp(old_key->name, new_key->name) != 0) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* If index algorithms are different then keys are different. */
+ if (old_key->algorithm != new_key->algorithm) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* Check that the key is identical between old and new tables. */
+ if ((old_key->flags ^ new_key->flags) & HA_KEYFLAG_MASK) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /* Check index comment. (for column family changes) */
+ std::string old_comment(old_key->comment.str, old_key->comment.length);
+ std::string new_comment(new_key->comment.str, new_key->comment.length);
+ if (old_comment.compare(new_comment) != 0) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ DBUG_RETURN(HA_EXIT_SUCCESS);
+}
+
/* Check two keys to ensure that key parts within keys match */
int ha_rocksdb::compare_key_parts(const KEY *const old_key,
const KEY *const new_key) const {
@@ -5185,6 +6189,11 @@ int ha_rocksdb::compare_key_parts(const KEY *const old_key,
new_key->key_part[i].field->field_name) != 0) {
DBUG_RETURN(HA_EXIT_FAILURE);
}
+
+ /* Check if prefix index key part length has changed */
+ if (old_key->key_part[i].length != new_key->key_part[i].length) {
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
}
DBUG_RETURN(HA_EXIT_SUCCESS);
@@ -5216,6 +6225,36 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
DBUG_ASSERT(new_key_def != nullptr);
DBUG_ASSERT(*new_key_def == nullptr);
+ uint64 ttl_duration = 0;
+ std::string ttl_column;
+ uint ttl_field_offset;
+
+ uint err;
+ if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg,
+ &ttl_duration))) {
+ DBUG_RETURN(err);
+ }
+
+ if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column,
+ &ttl_field_offset))) {
+ DBUG_RETURN(err);
+ }
+
+ /* We don't currently support TTL on tables with hidden primary keys. */
+ if (ttl_duration > 0 && is_hidden_pk(i, table_arg, tbl_def_arg)) {
+ my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0));
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
+ /*
+ If TTL duration is not specified but TTL column was specified, throw an
+ error because TTL column requires duration.
+ */
+ if (ttl_duration == 0 && !ttl_column.empty()) {
+ my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str());
+ DBUG_RETURN(HA_EXIT_FAILURE);
+ }
+
const uint index_id = ddl_manager.get_and_update_next_number(&dict_manager);
const uint16_t index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST;
uchar index_type;
@@ -5234,11 +6273,36 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i,
kv_version = sk_latest_version;
}
+ // Use PRIMARY_FORMAT_VERSION_UPDATE1 here since it is the same value as
+ // SECONDARY_FORMAT_VERSION_UPDATE1 so it doesn't matter if this is a
+ // primary key or secondary key.
+ DBUG_EXECUTE_IF("MYROCKS_LEGACY_VARBINARY_FORMAT", {
+ kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_UPDATE1;
+ });
+
+ DBUG_EXECUTE_IF("MYROCKS_NO_COVERED_BITMAP_FORMAT", {
+ if (index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ kv_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_UPDATE2;
+ }
+ });
+
+ uint32 index_flags = (ttl_duration > 0 ? Rdb_key_def::TTL_FLAG : 0);
+
+ uint32 ttl_rec_offset =
+ Rdb_key_def::has_index_flag(index_flags, Rdb_key_def::TTL_FLAG)
+ ? Rdb_key_def::calculate_index_flag_offset(index_flags,
+ Rdb_key_def::TTL_FLAG)
+ : UINT_MAX;
+
const char *const key_name = get_key_name(i, table_arg, m_tbl_def);
*new_key_def = std::make_shared<Rdb_key_def>(
index_id, i, cf_info.cf_handle, index_dict_version, index_type,
- kv_version, cf_info.is_reverse_cf, cf_info.is_auto_cf,
- cf_info.is_per_partition_cf, key_name);
+ kv_version, cf_info.is_reverse_cf, cf_info.is_per_partition_cf, key_name,
+ Rdb_index_stats(), index_flags, ttl_rec_offset, ttl_duration);
+
+ if (!ttl_column.empty()) {
+ (*new_key_def)->m_ttl_column = ttl_column;
+ }
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -5249,13 +6313,13 @@ int rdb_normalize_tablename(const std::string &tablename,
if (tablename.size() < 2 || tablename[0] != '.' || tablename[1] != FN_LIBCHAR) {
DBUG_ASSERT(0); // We were not passed table name?
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
}
size_t pos = tablename.find_first_of(FN_LIBCHAR, 2);
if (pos == std::string::npos) {
DBUG_ASSERT(0); // We were not passed table name?
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
}
*strbuf = tablename.substr(2, pos - 2) + "." + tablename.substr(pos + 1);
@@ -5336,7 +6400,7 @@ int rdb_split_normalized_tablename(const std::string &fullname,
/* Invalid table name? */
if (dotpos == std::string::npos) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
}
// Table must have a database name associated with it.
@@ -5383,6 +6447,10 @@ int rdb_split_normalized_tablename(const std::string &fullname,
Called from handle.cc by ha_create_table().
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+
@see
ha_create_table() in handle.cc
*/
@@ -5411,27 +6479,24 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
DBUG_RETURN(HA_WRONG_CREATE_OPTION);
}
- int res;
std::string str;
+ int err;
THD *const thd = my_core::thd_get_current_thd();
if (get_table_if_exists(name)) {
if (thd->lex->sql_command == SQLCOM_TRUNCATE) {
- if (delete_table(name)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ err = delete_table(name);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
}
} else {
- if (rdb_normalize_tablename(name, &str)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ err = rdb_normalize_tablename(name, &str);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
}
- my_printf_error(
- ER_UNKNOWN_ERROR,
- "Table '%s' does not exist, but metadata information exists inside "
- "MyRocks. This is a sign of data inconsistency. Please check if "
- "'%s.frm' exists, and try to restore it if it does not exist.",
- MYF(0), str.c_str(), name);
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name);
+ DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA);
}
}
@@ -5440,32 +6505,17 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
passes strings like "./test/t14#P#p0" for individual partitions,
while table_arg->s->table_name has none of that.
*/
- if (rdb_normalize_tablename(name, &str)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ err = rdb_normalize_tablename(name, &str);
+ if (err != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(err);
}
if (contains_foreign_key(thd)) {
- my_printf_error(ER_NOT_SUPPORTED_YET, "MyRocks does not currently support "
- "foreign key constraints",
- MYF(0));
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
-
- /*
- TODO(alexyang): Temporarily disable unique indexes support when there is no
- Primary Key
- */
- for (uint keyno = 0; keyno < table_arg->s->keys; keyno++) {
- if ((table_arg->key_info[keyno].flags & HA_NOSAME) &&
- has_hidden_pk(table_arg)) {
- my_printf_error(ER_UNKNOWN_ERROR, "Unique index support is disabled when "
- "the table has no primary key. ",
- MYF(0));
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
+ my_error(ER_NOT_SUPPORTED_YET, MYF(0),
+ "FOREIGN KEY for the RocksDB storage engine");
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
}
- bool write_err = false;
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
@@ -5486,21 +6536,28 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg,
m_tbl_def->m_key_count = n_keys;
m_tbl_def->m_key_descr_arr = m_key_descr_arr;
- if ((res = create_key_defs(table_arg, m_tbl_def))) {
+ err = create_key_defs(table_arg, m_tbl_def);
+ if (err != HA_EXIT_SUCCESS) {
goto error;
}
m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)];
dict_manager.lock();
- write_err =
- ddl_manager.put_and_write(m_tbl_def, batch) || dict_manager.commit(batch);
- dict_manager.unlock();
+ err = ddl_manager.put_and_write(m_tbl_def, batch);
+ if (err != HA_EXIT_SUCCESS) {
+ dict_manager.unlock();
+ goto error;
+ }
- if (write_err) {
+ err = dict_manager.commit(batch);
+ if (err != HA_EXIT_SUCCESS) {
+ dict_manager.unlock();
goto error;
}
+ dict_manager.unlock();
+
if (create_info->auto_increment_value)
m_tbl_def->m_auto_incr_val = create_info->auto_increment_value;
@@ -5526,7 +6583,7 @@ error:
m_tbl_def = nullptr;
m_key_descr_arr = nullptr;
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ DBUG_RETURN(err);
}
/**
@@ -5573,7 +6630,8 @@ bool ha_rocksdb::check_keyread_allowed(uint inx, uint part,
int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
rocksdb::Iterator *const iter,
const bool &full_key_match,
- const rocksdb::Slice &key_slice) const {
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts) {
DBUG_ASSERT(iter != nullptr);
/*
@@ -5582,38 +6640,63 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd,
lookup_tuple may be a prefix of the index.
*/
rocksdb_smart_seek(kd.m_is_reverse_cf, iter, key_slice);
- if (!iter->Valid() || !kd.value_matches_prefix(iter->key(), key_slice)) {
+
+ while (iter->Valid() && kd.value_matches_prefix(iter->key(), key_slice)) {
/*
- Got a record that is not equal to the lookup value, or even a record
- from another table.index.
+ If TTL is enabled we need to check if the given key has already expired
+ from the POV of the current transaction. If it has, try going to the next
+ key.
*/
- return HA_ERR_KEY_NOT_FOUND;
+ if (kd.has_ttl() && should_hide_ttl_rec(kd, iter->value(), ttl_filter_ts)) {
+ rocksdb_smart_next(kd.m_is_reverse_cf, iter);
+ continue;
+ }
+
+ return HA_EXIT_SUCCESS;
}
- return HA_EXIT_SUCCESS;
+
+ /*
+ Got a record that is not equal to the lookup value, or even a record
+ from another table.index.
+ */
+ return HA_ERR_KEY_NOT_FOUND;
}
int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
const bool &full_key_match,
- const rocksdb::Slice &key_slice) {
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts) {
/*
We are looking for record with the biggest t.key such that
t.key < lookup_tuple.
*/
rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice);
- if (m_scan_it->Valid() && full_key_match &&
- kd.value_matches_prefix(m_scan_it->key(), key_slice)) {
- /* We are using full key and we've hit an exact match */
- if (kd.m_is_reverse_cf) {
- m_scan_it->Next();
- } else {
- m_scan_it->Prev();
+
+ while (m_scan_it->Valid()) {
+ /*
+ We are using full key and we've hit an exact match, or...
+
+ If TTL is enabled we need to check if the given key has already expired
+ from the POV of the current transaction. If it has, try going to the next
+ key.
+ */
+ if ((full_key_match &&
+ kd.value_matches_prefix(m_scan_it->key(), key_slice)) ||
+ (kd.has_ttl() &&
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts))) {
+ rocksdb_smart_next(!kd.m_is_reverse_cf, m_scan_it);
+ continue;
}
+
+ return HA_EXIT_SUCCESS;
}
- return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND;
+
+ return HA_ERR_KEY_NOT_FOUND;
}
int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
- const rocksdb::Slice &key_slice) {
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts) {
/*
We are looking for the first record such that
@@ -5623,27 +6706,37 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
with HA_READ_KEY_OR_NEXT, $GT = '>='
*/
rocksdb_smart_seek(kd.m_is_reverse_cf, m_scan_it, key_slice);
+
+ /*
+ If TTL is enabled we need to check if the given key has already expired
+ from the POV of the current transaction. If it has, try going to the next
+ key.
+ */
+ while (m_scan_it->Valid() && kd.has_ttl() &&
+ should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) {
+ rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it);
+ }
+
return m_scan_it->Valid() ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND;
}
-int ha_rocksdb::position_to_correct_key(const Rdb_key_def &kd,
- const enum ha_rkey_function &find_flag,
- const bool &full_key_match,
- const uchar *const key,
- const key_part_map &keypart_map,
- const rocksdb::Slice &key_slice,
- bool *const move_forward) {
+int ha_rocksdb::position_to_correct_key(
+ const Rdb_key_def &kd, const enum ha_rkey_function &find_flag,
+ const bool &full_key_match, const uchar *const key,
+ const key_part_map &keypart_map, const rocksdb::Slice &key_slice,
+ bool *const move_forward, const int64_t ttl_filter_ts) {
int rc = 0;
*move_forward = true;
switch (find_flag) {
case HA_READ_KEY_EXACT:
- rc = read_key_exact(kd, m_scan_it, full_key_match, key_slice);
+ rc =
+ read_key_exact(kd, m_scan_it, full_key_match, key_slice, ttl_filter_ts);
break;
case HA_READ_BEFORE_KEY:
*move_forward = false;
- rc = read_before_key(kd, full_key_match, key_slice);
+ rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
/* The record we've got is not from this index */
rc = HA_ERR_KEY_NOT_FOUND;
@@ -5651,7 +6744,7 @@ int ha_rocksdb::position_to_correct_key(const Rdb_key_def &kd,
break;
case HA_READ_AFTER_KEY:
case HA_READ_KEY_OR_NEXT:
- rc = read_after_key(kd, key_slice);
+ rc = read_after_key(kd, key_slice, ttl_filter_ts);
if (rc == 0 && !kd.covers_key(m_scan_it->key())) {
/* The record we've got is not from this index */
rc = HA_ERR_KEY_NOT_FOUND;
@@ -5674,7 +6767,7 @@ int ha_rocksdb::position_to_correct_key(const Rdb_key_def &kd,
records with prefix=lookup, we should return the last record
before that.
*/
- rc = read_before_key(kd, full_key_match, key_slice);
+ rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts);
if (rc == 0) {
const rocksdb::Slice &rkey = m_scan_it->key();
if (!kd.covers_key(rkey)) {
@@ -5778,16 +6871,21 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
int rc = 0;
uint pk_size;
- if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
- /* Get the key columns and primary key value */
- const rocksdb::Slice &rkey = m_scan_it->key();
+ /* Get the key columns and primary key value */
+ const rocksdb::Slice &rkey = m_scan_it->key();
+ const rocksdb::Slice &value = m_scan_it->value();
+
+ bool covered_lookup =
+ m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap);
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) {
pk_size =
kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
- const rocksdb::Slice &value = m_scan_it->value();
- if (pk_size == RDB_INVALID_KEY_LEN ||
- kd.unpack_record(table, buf, &rkey, &value,
- m_verify_row_debug_checksums)) {
- rc = HA_ERR_INTERNAL_ERROR;
+ if (pk_size == RDB_INVALID_KEY_LEN) {
+ rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
+ } else {
+ rc = kd.unpack_record(table, buf, &rkey, &value,
+ m_verify_row_debug_checksums);
+ global_stats.covered_secondary_key_lookups.inc();
}
} else {
if (kd.m_is_reverse_cf)
@@ -5799,7 +6897,7 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey,
m_pk_packed_tuple);
if (pk_size == RDB_INVALID_KEY_LEN) {
- rc = HA_ERR_INTERNAL_ERROR;
+ rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
} else {
rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size);
}
@@ -5891,19 +6989,21 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) {
const uint size = m_key_descr_arr[keyno]->get_primary_key_tuple(
table, *m_pk_descr, &key, m_pk_packed_tuple);
if (size == RDB_INVALID_KEY_LEN) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
m_last_rowkey.copy((const char *)m_pk_packed_tuple, size,
&my_charset_bin);
- if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE &&
+ rocksdb::Slice value = m_scan_it->value();
+ bool covered_lookup =
+ m_keyread_only || m_key_descr_arr[keyno]->covers_lookup(
+ table, &value, &m_lookup_bitmap);
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE &&
!has_hidden_pk(table)) {
- rocksdb::Slice value = m_scan_it->value();
- if (m_key_descr_arr[keyno]->unpack_record(
- table, buf, &key, &value, m_verify_row_debug_checksums)) {
- return HA_ERR_INTERNAL_ERROR;
- }
+ rc = m_key_descr_arr[keyno]->unpack_record(
+ table, buf, &key, &value, m_verify_row_debug_checksums);
+ global_stats.covered_secondary_key_lookups.inc();
} else {
rc = get_row_by_rowid(buf, m_pk_packed_tuple, size);
}
@@ -5935,6 +7035,10 @@ bool ha_rocksdb::is_using_full_key(key_part_map keypart_map,
The only difference from handler::read_range_first is that
ha_rocksdb::read_range_first passes end_key to
ha_rocksdb::index_read_map_impl function.
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
*/
int ha_rocksdb::read_range_first(const key_range *const start_key,
const key_range *const end_key,
@@ -6003,6 +7107,12 @@ int ha_rocksdb::prepare_range_scan(const key_range *start_key,
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+
int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key,
key_part_map keypart_map,
enum ha_rkey_function find_flag) {
@@ -6023,6 +7133,10 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key,
condition length is the same as rocksdb::Slice.size() of the start key.
On range scan, equal condition length is MIN(start_key, end_key) of the
rocksdb::Slice expression.
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
*/
int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
key_part_map keypart_map,
@@ -6053,8 +7167,10 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
const uint size = kd.pack_index_tuple(table, m_pack_buffer,
m_pk_packed_tuple, key, keypart_map);
bool skip_lookup = is_blind_delete_enabled();
+
rc = get_row_by_rowid(buf, m_pk_packed_tuple, size,
- skip_lookup);
+ skip_lookup, false);
+
if (!rc && !skip_lookup) {
#ifdef MARIAROCKS_NOT_YET
stats.rows_read++;
@@ -6133,16 +7249,16 @@ int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key,
This will open the iterator and position it at a record that's equal or
greater than the lookup tuple.
*/
- setup_scan_iterator(kd, &slice, use_all_keys, is_ascending(kd, find_flag),
- eq_cond_len);
+ setup_scan_iterator(kd, &slice, use_all_keys, eq_cond_len);
/*
Once we are positioned on from above, move to the position we really
want: See storage/rocksdb/rocksdb-range-access.txt
*/
bool move_forward;
- rc = position_to_correct_key(kd, find_flag, using_full_key, key,
- keypart_map, slice, &move_forward);
+ rc =
+ position_to_correct_key(kd, find_flag, using_full_key, key, keypart_map,
+ slice, &move_forward, tx->m_snapshot_timestamp);
if (rc) {
/* This status is returned on any error */
@@ -6213,6 +7329,8 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
const Rdb_key_def &kd = *m_key_descr_arr[active_index];
while (1) {
+ rocksdb_skip_expired_records(kd, m_scan_it, !move_forward);
+
if (!m_scan_it->Valid()) {
table->status = STATUS_NOT_FOUND;
return HA_ERR_END_OF_FILE;
@@ -6234,17 +7352,15 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
}
const rocksdb::Slice value = m_scan_it->value();
- if (kd.unpack_record(table, buf, &rkey, &value,
- m_verify_row_debug_checksums)) {
- return HA_ERR_INTERNAL_ERROR;
+ int err = kd.unpack_record(table, buf, &rkey, &value,
+ m_verify_row_debug_checksums);
+ if (err != HA_EXIT_SUCCESS) {
+ return err;
}
const enum icp_result icp_status= handler_index_cond_check(this);
if (icp_status == ICP_NO_MATCH) {
- if (move_forward)
- m_scan_it->Next();
- else
- m_scan_it->Prev();
+ rocksdb_smart_next(!move_forward, m_scan_it);
continue; /* Get the next (or prev) index tuple */
}
else if (icp_status == ICP_OUT_OF_RANGE || icp_status == ICP_ABORTED_BY_USER) {
@@ -6271,7 +7387,17 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward,
return HA_EXIT_SUCCESS;
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
+/**
+ @return
+ HA_ADMIN_OK OK
+ other HA_ADMIN error code
+*/
int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) {
DBUG_ENTER_FUNC();
@@ -6503,13 +7629,13 @@ bool ha_rocksdb::is_blind_delete_enabled()
Given a rowid (i.e. packed PK) as a parameter, get the record.
@return
- 0 - Ok
- other - HA_ERR error code.
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
*/
int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
- const uint rowid_size,
- const bool skip_lookup) {
+ const uint rowid_size, const bool skip_lookup,
+ const bool skip_ttl_check) {
DBUG_ENTER_FUNC();
DBUG_ASSERT(buf != nullptr);
@@ -6556,16 +7682,28 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
}
if (!s.IsNotFound() && !s.ok()) {
- DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def));
+ DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
+ m_table_handler));
}
found = !s.IsNotFound();
table->status = STATUS_NOT_FOUND;
if (found) {
+ /* If we found the record, but it's expired, pretend we didn't find it. */
+ if (!skip_ttl_check && m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(*m_pk_descr,
+ rocksdb::Slice(&m_retrieved_record.front(),
+ m_retrieved_record.size()),
+ tx->m_snapshot_timestamp)) {
+ DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
+ }
+
m_last_rowkey.copy((const char *)rowid, rowid_size, &my_charset_bin);
rc = convert_record_from_storage_format(&key_slice, buf);
- if (!rc)
+
+ if (!rc) {
table->status = 0;
+ }
} else {
/*
Note: we don't need to unlock the row. It is intentional that we keep
@@ -6577,6 +7715,11 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid,
DBUG_RETURN(rc);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::index_next(uchar *const buf) {
DBUG_ENTER_FUNC();
@@ -6592,6 +7735,11 @@ int ha_rocksdb::index_next(uchar *const buf) {
DBUG_RETURN(rc);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::index_prev(uchar *const buf) {
DBUG_ENTER_FUNC();
@@ -6623,6 +7771,8 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) {
else
m_scan_it->Prev();
}
+ rocksdb_skip_expired_records(*m_key_descr_arr[active_index], m_scan_it,
+ !move_forward);
rc = find_icp_matching_index_rec(move_forward, buf);
if (!rc)
rc = secondary_index_read(active_index, buf);
@@ -6631,6 +7781,11 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) {
DBUG_RETURN(rc);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::index_first(uchar *const buf) {
DBUG_ENTER_FUNC();
@@ -6644,6 +7799,11 @@ int ha_rocksdb::index_first(uchar *const buf) {
DBUG_RETURN(rc);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::index_last(uchar *const buf) {
DBUG_ENTER_FUNC();
@@ -6721,8 +7881,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
// Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this)
for (;;) {
- setup_scan_iterator(kd, &index_key, false, !kd.m_is_reverse_cf,
- Rdb_key_def::INDEX_NUMBER_SIZE);
+ setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE);
m_scan_it->Seek(index_key);
m_skip_scan_it_next_call = true;
@@ -6766,13 +7925,13 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
(n+1) <---- Doesn't exist, but would be here.
(n+1)-smth, or no value at all
- RocksDB's Iterator::Seek($val) seeks to "at $val or first value that's
- greater". We can't see to "(n)-ccc" directly, because we don't know what
+ RocksDB's Iterator::SeekForPrev($val) seeks to "at $val or last value that's
+ smaller". We can't seek to "(n)-ccc" directly, because we don't know what
is the value of 'ccc' (the biggest record with prefix (n)). Instead, we seek
to "(n+1)", which is the least possible value that's greater than any value
- in index #n. Then we step one record back.
+ in index #n.
- So, need to: it->Seek(n+1) || it->SeekToLast(); it->Prev();
+ So, need to: it->SeekForPrev(n+1)
A backward-ordered keyspace:
@@ -6783,7 +7942,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
( n ) <--- Doesn't exist, but would be here.
(n-1)-smth, or no value at all
- So, need to: it->Seek(n) || it->SeekToLast(); it->Prev();
+ So, need to: it->SeekForPrev(n)
*/
int ha_rocksdb::index_last_intern(uchar *const buf) {
@@ -6819,8 +7978,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) {
// Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this)
for (;;) {
- setup_scan_iterator(kd, &index_key, false, kd.m_is_reverse_cf,
- Rdb_key_def::INDEX_NUMBER_SIZE);
+ setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE);
m_scan_it->SeekForPrev(index_key);
m_skip_scan_it_next_call = false;
@@ -6971,16 +8129,10 @@ bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg,
is_hidden_pk(index, table_arg, tbl_def_arg);
}
-/*
- Formats the string and returns the column family name assignment part for a
- specific partition.
-*/
-const std::string ha_rocksdb::gen_cf_name_qualifier_for_partition(
- const std::string& prefix) {
- DBUG_ASSERT(!prefix.empty());
-
- return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_CF_NAME_QUALIFIER
- + RDB_PER_PARTITION_QUALIFIER_VALUE_SEP;
+uint ha_rocksdb::max_supported_key_part_length() const {
+ DBUG_ENTER_FUNC();
+ DBUG_RETURN(rocksdb_large_prefix ? MAX_INDEX_COL_LEN_LARGE
+ : MAX_INDEX_COL_LEN_SMALL);
}
const char *ha_rocksdb::get_key_name(const uint index,
@@ -7034,53 +8186,39 @@ const std::string ha_rocksdb::generate_cf_name(const uint index,
// `get_key_comment` can return `nullptr`, that's why this.
std::string key_comment = comment ? comment : "";
- // If table has partitions then we need to check if user has requested to
- // create a column family with a specific name on a per partition basis.
- if (table_arg->part_info != nullptr) {
- std::string partition_name = tbl_def_arg->base_partition();
- DBUG_ASSERT(!partition_name.empty());
-
- // Let's fetch the comment for a index and check if there's a custom key
- // name specified for a partition we are handling.
- std::vector<std::string> v = myrocks::parse_into_tokens(key_comment,
- RDB_QUALIFIER_SEP);
- std::string part_to_search = gen_cf_name_qualifier_for_partition(
- partition_name);
- DBUG_ASSERT(!part_to_search.empty());
-
- // Basic O(N) search for a matching assignment. At most we expect maybe
- // ten or so elements here.
- for (const auto &it : v) {
- if (it.substr(0, part_to_search.length()) == part_to_search) {
- // We found a prefix match. Try to parse it as an assignment.
- std::vector<std::string> tokens = myrocks::parse_into_tokens(it,
- RDB_PER_PARTITION_QUALIFIER_VALUE_SEP);
-
- // We found a custom name, it was in the form we expected it to be.
- // Return that instead of whatever we initially wanted to return. In
- // a case below the `foo` part will be returned to the caller.
- //
- // p3_cfname=foo
- //
- // If no value was specified then we'll return an empty string which
- // later gets translated into using a default CF.
- if (tokens.size() == 2) {
- *per_part_match_found = true;
- return tokens[1];
- } else {
- return "";
- }
- }
- }
+ std::string cf_name = Rdb_key_def::parse_comment_for_qualifier(
+ key_comment, table_arg, tbl_def_arg, per_part_match_found,
+ RDB_CF_NAME_QUALIFIER);
+ if (table_arg->part_info != nullptr && !*per_part_match_found) {
// At this point we tried to search for a custom CF name for a partition,
// but none was specified. Therefore default one will be used.
return "";
}
- return key_comment;
+ // If we didn't find any partitioned/non-partitioned qualifiers, return the
+ // comment itself. NOTE: this currently handles returning the cf name
+ // specified in the index comment in the case of no partitions, which doesn't
+ // use any qualifiers at the moment. (aka its a special case)
+ if (cf_name.empty() && !key_comment.empty()) {
+ return key_comment;
+ }
+
+ return cf_name;
}
+const std::string ha_rocksdb::get_table_comment(const TABLE *const table_arg) {
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(table_arg->s != nullptr);
+
+ return table_arg->s->comment.str;
+}
+
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::write_row(uchar *const buf) {
DBUG_ENTER_FUNC();
@@ -7143,9 +8281,10 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) {
row_info->new_pk_unpack_info = &m_pk_unpack_info;
- size = m_pk_descr->pack_record(table, m_pack_buffer, row_info->new_data,
- m_pk_packed_tuple,
- row_info->new_pk_unpack_info, false);
+ size =
+ m_pk_descr->pack_record(table, m_pack_buffer, row_info->new_data,
+ m_pk_packed_tuple, row_info->new_pk_unpack_info,
+ false, 0, 0, nullptr, &row_info->ttl_pk_offset);
} else if (row_info->old_data == nullptr) {
row_info->hidden_pk_id = update_hidden_pk_val();
size =
@@ -7157,8 +8296,9 @@ int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) {
*/
size = row_info->old_pk_slice.size();
memcpy(m_pk_packed_tuple, row_info->old_pk_slice.data(), size);
- if (read_hidden_pk_id_from_rowkey(&row_info->hidden_pk_id)) {
- return HA_ERR_INTERNAL_ERROR;
+ int err = read_hidden_pk_id_from_rowkey(&row_info->hidden_pk_id);
+ if (err) {
+ return err;
}
}
@@ -7217,8 +8357,8 @@ int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id,
get_for_update(row_info.tx, m_pk_descr->get_cf(), row_info.new_pk_slice,
&m_retrieved_record);
if (!s.ok() && !s.IsNotFound()) {
- return row_info.tx->set_status_error(table->in_use, s,
- *m_key_descr_arr[key_id], m_tbl_def);
+ return row_info.tx->set_status_error(
+ table->in_use, s, *m_key_descr_arr[key_id], m_tbl_def, m_table_handler);
}
*found = !s.IsNotFound();
@@ -7227,7 +8367,7 @@ int ha_rocksdb::check_and_lock_unique_pk(const uint &key_id,
int ha_rocksdb::check_and_lock_sk(const uint &key_id,
const struct update_row_info &row_info,
- bool *const found) const {
+ bool *const found) {
DBUG_ASSERT(found != nullptr);
*found = false;
@@ -7280,8 +8420,8 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id,
*/
if (row_info.old_data != nullptr) {
size = kd.pack_record(table, m_pack_buffer, row_info.old_data,
- m_sk_packed_tuple_old, nullptr, false,
- row_info.hidden_pk_id, user_defined_key_parts);
+ m_sk_packed_tuple_old, nullptr, false, 0,
+ user_defined_key_parts);
const rocksdb::Slice old_slice =
rocksdb::Slice((const char *)m_sk_packed_tuple_old, size);
@@ -7319,8 +8459,7 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id,
The bloom filter may need to be disabled for this lookup.
*/
const bool total_order_seek = !can_use_bloom_filter(
- ha_thd(), kd, new_slice, all_parts_used,
- is_ascending(*m_key_descr_arr[key_id], HA_READ_KEY_EXACT));
+ ha_thd(), kd, new_slice, all_parts_used);
const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache);
/*
@@ -7330,7 +8469,8 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id,
const rocksdb::Status s =
get_for_update(row_info.tx, kd.get_cf(), new_slice, &dummy_value);
if (!s.ok() && !s.IsNotFound()) {
- return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def);
+ return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
+ m_table_handler);
}
rocksdb::Iterator *const iter = row_info.tx->get_iterator(
@@ -7342,7 +8482,8 @@ int ha_rocksdb::check_and_lock_sk(const uint &key_id,
in the transaction.
*/
iter->Seek(new_slice);
- *found = !read_key_exact(kd, iter, all_parts_used, new_slice);
+ *found = !read_key_exact(kd, iter, all_parts_used, new_slice,
+ row_info.tx->m_snapshot_timestamp);
delete iter;
return HA_EXIT_SUCCESS;
@@ -7366,10 +8507,25 @@ int ha_rocksdb::check_uniqueness_and_lock(
rc = check_and_lock_sk(key_id, row_info, &found);
}
- if (rc != 0) {
+ if (rc != HA_EXIT_SUCCESS) {
return rc;
}
+ /*
+ If the pk key has ttl, we may need to pretend the row wasn't
+ found if it is already expired. The pk record is read into
+ m_retrieved_record by check_and_lock_unique_pk().
+ */
+ if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(*m_pk_descr,
+ rocksdb::Slice(&m_retrieved_record.front(),
+ m_retrieved_record.size()),
+ (row_info.tx->m_snapshot_timestamp
+ ? row_info.tx->m_snapshot_timestamp
+ : static_cast<int64_t>(std::time(nullptr))))) {
+ found = false;
+ }
+
if (found) {
/* There is a row with this key already, so error out. */
errkey = key_id;
@@ -7410,72 +8566,112 @@ int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg,
int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
const rocksdb::Slice &key,
- const rocksdb::Slice &value) {
- rocksdb::ColumnFamilyHandle *const cf = kd.get_cf();
+ const rocksdb::Slice &value, bool sort) {
+ DBUG_ENTER_FUNC();
+
+ rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
DBUG_ASSERT(cf != nullptr);
- if (m_sst_info == nullptr) {
- m_sst_info = std::make_shared<Rdb_sst_info>(
- rdb, m_table_handler->m_table_name, kd.get_name(), cf,
- rocksdb_db_options, THDVAR(ha_thd(), trace_sst_api));
- tx->start_bulk_load(this);
- m_bulk_load_tx = tx;
- }
+ int res = HA_EXIT_SUCCESS;
+
+ if (sort) {
+ GL_INDEX_ID kd_gl_id = kd.get_gl_index_id();
+ auto it = m_key_merge.find(kd_gl_id);
+ if (it == m_key_merge.end()) {
+ m_key_merge.emplace(
+ std::piecewise_construct, std::make_tuple(kd_gl_id),
+ std::make_tuple(
+ thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size),
+ THDVAR(ha_thd(), merge_combine_read_size),
+ THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf));
+ it = m_key_merge.find(kd_gl_id);
+ if ((res = it->second.init()) != 0) {
+ DBUG_RETURN(res);
+ }
- DBUG_ASSERT(m_sst_info != nullptr);
+ if (m_bulk_load_tx == nullptr) {
+ tx->start_bulk_load(this);
+ m_bulk_load_tx = tx;
+ }
+ }
+ res = it->second.add(key, value);
+ } else {
+ if (!m_sst_info) {
+ m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name,
+ kd.get_name(), cf, *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api)));
+ tx->start_bulk_load(this);
+ m_bulk_load_tx = tx;
+ }
+
+ DBUG_ASSERT(m_sst_info);
- int rc = m_sst_info->put(key, value);
- if (rc != 0) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "Failed to add a key to sst file writer(%s)", MYF(0),
- m_sst_info->error_message().c_str());
- rc = HA_ERR_INTERNAL_ERROR;
+ res = m_sst_info->put(key, value);
}
- return rc;
+ DBUG_RETURN(res);
}
int ha_rocksdb::finalize_bulk_load() {
- int rc = 0;
+ DBUG_ENTER_FUNC();
+
+ DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info,
+ m_bulk_load_tx != nullptr);
/* Skip if there are no possible ongoing bulk loads */
- if (m_sst_info == nullptr && m_bulk_load_tx == nullptr) {
- return rc;
+ if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) {
+ DBUG_RETURN(HA_EXIT_SUCCESS);
}
+ int res = HA_EXIT_SUCCESS;
+
RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex);
- /*
- We need this check because it's possible that m_sst_info has been
- flushed and cleared by another thread by the time the mutex has been
- acquired.
- */
- if (m_sst_info != nullptr) {
- rc = m_sst_info->commit();
- if (rc != 0) {
- /*
- Log the error immediately here in case the server crashes before
- mysql prints via my_printf_error.
- */
- sql_print_error("Failed to commit bulk loaded sst file to the "
- "data store (%s)",
- m_sst_info->error_message().c_str());
+ if (m_sst_info) {
+ res = m_sst_info->commit();
+ m_sst_info.reset();
+ }
- my_printf_error(ER_UNKNOWN_ERROR,
- "Failed to commit bulk loaded sst file to the "
- "data store (%s)",
- MYF(0), m_sst_info->error_message().c_str());
- rc = HA_ERR_INTERNAL_ERROR;
+ if (!m_key_merge.empty()) {
+ rocksdb::Slice merge_key;
+ rocksdb::Slice merge_val;
+ for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) {
+ const std::string &index_name =
+ ddl_manager.safe_find(it->first)->get_name();
+ Rdb_index_merge &rdb_merge = it->second;
+ Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name,
+ rdb_merge.get_cf(), *rocksdb_db_options,
+ THDVAR(ha_thd(), trace_sst_api));
+
+ while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) {
+ if ((res = sst_info.put(merge_key, merge_val)) != 0) {
+ break;
+ }
+ }
+ // res == -1 => finished ok; res > 0 => error
+ if (res <= 0) {
+ if ((res = sst_info.commit()) != 0) {
+ break;
+ }
+ }
}
+ m_key_merge.clear();
+
+ /*
+ Explicitly tell jemalloc to clean up any unused dirty pages at this point.
+ See https://reviews.facebook.net/D63723 for more details.
+ */
+ purge_all_jemalloc_arenas();
+ }
- m_sst_info = nullptr;
+ if (m_bulk_load_tx != nullptr) {
m_bulk_load_tx->end_bulk_load(this);
m_bulk_load_tx = nullptr;
}
RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex);
- return rc;
+ DBUG_RETURN(res);
}
int ha_rocksdb::update_pk(const Rdb_key_def &kd,
@@ -7483,14 +8679,23 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
const bool &pk_changed) {
const uint key_id = kd.get_keyno();
const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def);
- if (!hidden_pk && pk_changed) {
- /*
- The old key needs to be deleted.
- */
+ ulonglong bytes_written = 0;
+
+ /*
+ If the PK has changed, or if this PK uses single deletes and this is an
+ update, the old key needs to be deleted. In the single delete case, it
+ might be possible to have this sequence of keys: PUT(X), PUT(X), SD(X),
+ resulting in the first PUT(X) showing up.
+ */
+ if (!hidden_pk && (pk_changed || ((row_info.old_pk_slice.size() > 0) &&
+ can_use_single_delete(key_id)))) {
const rocksdb::Status s = delete_or_singledelete(
key_id, row_info.tx, kd.get_cf(), row_info.old_pk_slice);
if (!s.ok()) {
- return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def);
+ return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def,
+ m_table_handler);
+ } else {
+ bytes_written = row_info.old_pk_slice.size();
}
}
@@ -7498,30 +8703,26 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
update_auto_incr_val();
}
+ int rc = HA_EXIT_SUCCESS;
rocksdb::Slice value_slice;
- convert_record_to_storage_format(row_info.new_pk_slice,
- row_info.new_pk_unpack_info, &value_slice);
+ /* Prepare the new record to be written into RocksDB */
+ if ((rc = convert_record_to_storage_format(row_info, &value_slice))) {
+ return rc;
+ }
- int rc = 0;
const auto cf = m_pk_descr->get_cf();
if (rocksdb_enable_bulk_load_api && THDVAR(table->in_use, bulk_load) &&
!hidden_pk) {
/*
Write the primary key directly to an SST file using an SstFileWriter
*/
- rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice);
- } else if (row_info.skip_unique_check) {
+ rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice,
+ THDVAR(table->in_use, bulk_load_allow_unsorted));
+ } else if (row_info.skip_unique_check || row_info.tx->m_ddl_transaction) {
/*
It is responsibility of the user to make sure that the data being
inserted doesn't violate any unique keys.
*/
- row_info.tx->get_blind_write_batch()->Put(cf, row_info.new_pk_slice,
- value_slice);
- } else if (row_info.tx->m_ddl_transaction) {
- /*
- DDL statement must check for unique key conflicts. For example:
- ALTER TABLE tbl DROP PRIMARY KEY, ADD PRIMARY KEY(non_unique_column)
- */
row_info.tx->get_indexed_write_batch()->Put(cf, row_info.new_pk_slice,
value_slice);
} else {
@@ -7533,11 +8734,15 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd,
rc = HA_ERR_FOUND_DUPP_KEY;
} else {
rc = row_info.tx->set_status_error(table->in_use, s, *m_pk_descr,
- m_tbl_def);
+ m_tbl_def, m_table_handler);
}
}
}
+ if (rc == HA_EXIT_SUCCESS) {
+ row_info.tx->update_bytes_written(
+ bytes_written + row_info.new_pk_slice.size() + value_slice.size());
+ }
return rc;
}
@@ -7551,24 +8756,31 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
rocksdb::Slice old_key_slice;
const uint key_id = kd.get_keyno();
+
+ ulonglong bytes_written = 0;
+
/*
- Can skip updating this key if none of the key fields have changed.
+ Can skip updating this key if none of the key fields have changed and, if
+ this table has TTL, the TTL timestamp has not changed.
*/
- if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) {
+ if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id) &&
+ (!kd.has_ttl() || !m_ttl_bytes_updated)) {
return HA_EXIT_SUCCESS;
}
const bool store_row_debug_checksums = should_store_row_debug_checksums();
- new_packed_size = kd.pack_record(
- table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple,
- &m_sk_tails, store_row_debug_checksums, row_info.hidden_pk_id);
+ new_packed_size =
+ kd.pack_record(table_arg, m_pack_buffer, row_info.new_data,
+ m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums,
+ row_info.hidden_pk_id, 0, nullptr, nullptr, m_ttl_bytes);
if (row_info.old_data != nullptr) {
// The old value
old_packed_size = kd.pack_record(
table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old,
- &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id);
+ &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0,
+ nullptr, nullptr, m_ttl_bytes);
/*
Check if we are going to write the same value. This can happen when
@@ -7586,6 +8798,7 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
*/
if (old_packed_size == new_packed_size &&
m_sk_tails_old.get_current_pos() == m_sk_tails.get_current_pos() &&
+ !(kd.has_ttl() && m_ttl_bytes_updated) &&
memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, old_packed_size) ==
0 &&
memcmp(m_sk_tails_old.ptr(), m_sk_tails.ptr(),
@@ -7604,23 +8817,8 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
row_info.tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
old_key_slice);
- }
- /*
- We're writing a new entry for secondary key. We can skip locking; we
- should write to
- - WriteBatchWithIndex normally (so that transaction sees the new row)
- - non-indexed WriteBatch, when we don't need to see the new row:
- = when doing a DDL operation and writing to a non-unique index, or
- = when doing a bulk load
- */
- rocksdb::WriteBatchBase *write_batch;
- if ((row_info.tx->m_ddl_transaction &&
- !(table_arg->key_info[key_id].flags & HA_NOSAME)) ||
- row_info.skip_unique_check) {
- write_batch = row_info.tx->get_blind_write_batch();
- } else {
- write_batch = row_info.tx->get_indexed_write_batch();
+ bytes_written = old_key_slice.size();
}
new_key_slice = rocksdb::Slice(
@@ -7629,7 +8827,11 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
rocksdb::Slice(reinterpret_cast<const char *>(m_sk_tails.ptr()),
m_sk_tails.get_current_pos());
- write_batch->Put(kd.get_cf(), new_key_slice, new_value_slice);
+ row_info.tx->get_indexed_write_batch()->Put(kd.get_cf(), new_key_slice,
+ new_value_slice);
+
+ row_info.tx->update_bytes_written(bytes_written + new_key_slice.size() +
+ new_value_slice.size());
return HA_EXIT_SUCCESS;
}
@@ -7638,15 +8840,20 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info,
const bool &pk_changed) {
int rc;
+ // The PK must be updated first to pull out the TTL value.
+ rc = update_pk(*m_pk_descr, row_info, pk_changed);
+ if (rc != HA_EXIT_SUCCESS) {
+ return rc;
+ }
+
+ // Update the remaining indexes.
for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) {
- const Rdb_key_def &kd = *m_key_descr_arr[key_id];
if (is_pk(key_id, table, m_tbl_def)) {
- rc = update_pk(kd, row_info, pk_changed);
- } else {
- rc = update_sk(table, kd, row_info);
+ continue;
}
- if (rc != 0) {
+ rc = update_sk(table, *m_key_descr_arr[key_id], row_info);
+ if (rc != HA_EXIT_SUCCESS) {
return rc;
}
}
@@ -7683,7 +8890,7 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
Get the new row key into row_info.new_pk_slice
*/
int rc = get_pk_for_update(&row_info);
- if (rc != 0) {
+ if (rc != HA_EXIT_SUCCESS) {
DBUG_RETURN(rc);
}
@@ -7693,7 +8900,7 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
keys. Also lock the appropriate key values.
*/
rc = check_uniqueness_and_lock(row_info, &pk_changed);
- if (rc != 0) {
+ if (rc != HA_EXIT_SUCCESS) {
DBUG_RETURN(rc);
}
}
@@ -7707,12 +8914,18 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
cannot be easily removed without rolling back the entire transaction.
*/
rc = update_indexes(row_info, pk_changed);
- if (rc != 0) {
+ if (rc != HA_EXIT_SUCCESS) {
DBUG_RETURN(rc);
}
+ if (old_data != nullptr) {
+ row_info.tx->incr_update_count();
+ } else {
+ row_info.tx->incr_insert_count();
+ }
+
if (do_bulk_commit(row_info.tx)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
DBUG_RETURN(HA_EXIT_SUCCESS);
@@ -7725,7 +8938,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
rocksdb::Slice *const slice,
const bool use_all_keys,
- const bool is_ascending,
const uint eq_cond_len) {
DBUG_ASSERT(slice != nullptr);
DBUG_ASSERT(slice->size() >= eq_cond_len);
@@ -7735,7 +8947,7 @@ void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd,
bool skip_bloom = true;
const rocksdb::Slice eq_cond(slice->data(), eq_cond_len);
- if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys, is_ascending)) {
+ if (can_use_bloom_filter(ha_thd(), kd, eq_cond, use_all_keys)) {
skip_bloom = false;
}
@@ -7806,6 +9018,11 @@ void ha_rocksdb::setup_iterator_for_rnd_scan() {
m_skip_scan_it_next_call = true;
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::rnd_init(bool scan) {
DBUG_ENTER_FUNC();
@@ -7827,6 +9044,11 @@ int ha_rocksdb::rnd_init(bool scan) {
DBUG_RETURN(HA_EXIT_SUCCESS);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::rnd_next(uchar *const buf) {
DBUG_ENTER_FUNC();
@@ -7906,6 +9128,13 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
*/
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete");
+
+ if (m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(*m_pk_descr, m_scan_it->value(),
+ tx->m_snapshot_timestamp)) {
+ continue;
+ }
+
const rocksdb::Status s =
get_for_update(tx, m_pk_descr->get_cf(), key, &m_retrieved_record);
if (s.IsNotFound() &&
@@ -7918,8 +9147,8 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
}
if (!s.ok()) {
- DBUG_RETURN(
- tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def));
+ DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr,
+ m_tbl_def, m_table_handler));
}
// If we called get_for_update() use the value from that call not from
@@ -7930,6 +9159,14 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
} else {
// Use the value from the iterator
rocksdb::Slice value = m_scan_it->value();
+
+ if (m_pk_descr->has_ttl() &&
+ should_hide_ttl_rec(
+ *m_pk_descr, value,
+ get_or_create_tx(table->in_use)->m_snapshot_timestamp)) {
+ continue;
+ }
+
m_last_rowkey.copy(key.data(), key.size(), &my_charset_bin);
rc = convert_record_from_storage_format(&key, &value, buf);
}
@@ -7957,6 +9194,11 @@ int ha_rocksdb::rnd_end() {
DBUG_RETURN(HA_EXIT_SUCCESS);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::index_init(uint idx, bool sorted) {
DBUG_ENTER_FUNC();
@@ -7965,6 +9207,10 @@ int ha_rocksdb::index_init(uint idx, bool sorted) {
setup_read_decoders();
+ if (!m_keyread_only) {
+ m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap);
+ }
+
// If m_lock_rows is not RDB_LOCK_NONE then we will be doing a get_for_update
// when accessing the index, so don't acquire the snapshot right away.
// Otherwise acquire the snapshot immediately.
@@ -7975,18 +9221,28 @@ int ha_rocksdb::index_init(uint idx, bool sorted) {
DBUG_RETURN(HA_EXIT_SUCCESS);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+*/
int ha_rocksdb::index_end() {
DBUG_ENTER_FUNC();
release_scan_iterator();
+ bitmap_free(&m_lookup_bitmap);
+
active_index = MAX_KEY;
in_range_check_pushed_down = FALSE;
DBUG_RETURN(HA_EXIT_SUCCESS);
}
-
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::truncate() {
DBUG_ENTER_FUNC();
@@ -7995,9 +9251,7 @@ int ha_rocksdb::truncate() {
m_tbl_def->m_auto_incr_val = 1;
m_tbl_def->m_hidden_pk_val = 1;
- remove_rows(m_tbl_def);
-
- DBUG_RETURN(HA_EXIT_SUCCESS);
+ DBUG_RETURN(remove_rows(m_tbl_def));
}
/*
@@ -8007,6 +9261,10 @@ int ha_rocksdb::truncate() {
The caller guarantees table buf points to the row that was just read.
The row is either table->record[0] or table->record[1].
(Check out InnoDB: row_update_for_mysql() has "UT_NOT_USED(mysql_rec)"
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
*/
int ha_rocksdb::delete_row(const uchar *const buf) {
DBUG_ENTER_FUNC();
@@ -8017,18 +9275,25 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
rocksdb::Slice key_slice(m_last_rowkey.ptr(), m_last_rowkey.length());
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
+ ulonglong bytes_written = 0;
const uint index = pk_index(table, m_tbl_def);
rocksdb::Status s =
delete_or_singledelete(index, tx, m_pk_descr->get_cf(), key_slice);
if (!s.ok()) {
- DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def));
+ DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
+ m_table_handler));
+ } else {
+ bytes_written = key_slice.size();
}
longlong hidden_pk_id = 0;
- if (m_tbl_def->m_key_count > 1 && has_hidden_pk(table) &&
- read_hidden_pk_id_from_rowkey(&hidden_pk_id))
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ if (m_tbl_def->m_key_count > 1 && has_hidden_pk(table)) {
+ int err = read_hidden_pk_id_from_rowkey(&hidden_pk_id);
+ if (err) {
+ DBUG_RETURN(err);
+ }
+ }
// Delete the record for every secondary index
for (uint i = 0; i < m_tbl_def->m_key_count; i++) {
@@ -8042,16 +9307,20 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
/* Deleting on secondary key doesn't need any locks: */
tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(),
secondary_key_slice);
+ bytes_written += secondary_key_slice.size();
}
}
+ tx->incr_delete_count();
+
if (do_bulk_commit(tx)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
#ifdef MARIAROCKS_NOT_YET
stats.rows_deleted++;
#endif
update_row_stats(ROWS_DELETED);
+ tx->update_bytes_written(bytes_written);
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -8085,11 +9354,16 @@ void ha_rocksdb::update_stats(void) {
DBUG_VOID_RETURN;
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ HA_EXIT_FAILURE Error
+*/
int ha_rocksdb::info(uint flag) {
DBUG_ENTER_FUNC();
if (!table)
- return HA_EXIT_FAILURE;
+ DBUG_RETURN(HA_EXIT_FAILURE);
if (flag & HA_STATUS_VARIABLE) {
/*
@@ -8127,19 +9401,43 @@ int ha_rocksdb::info(uint flag) {
uint64_t sz = 0;
uint8_t include_flags = rocksdb::DB::INCLUDE_FILES;
// recompute SST files stats only if records count is 0
- if (stats.records == 0) {
+ if (stats.records == 0) {
rdb->GetApproximateSizes(m_pk_descr->get_cf(), &r, 1, &sz,
include_flags);
stats.records+= sz/ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE;
stats.data_file_length+= sz;
}
- // Second, compute memtable stats
- uint64_t memtableCount;
- uint64_t memtableSize;
- rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
- &memtableCount, &memtableSize);
- stats.records += memtableCount;
- stats.data_file_length += memtableSize;
+ // Second, compute memtable stats. This call is expensive, so cache
+ // values computed for some time.
+ uint64_t cachetime = rocksdb_force_compute_memtable_stats_cachetime;
+ uint64_t time = (cachetime == 0) ? 0 : my_interval_timer() / 1000;
+ if (cachetime == 0 ||
+ time > m_table_handler->m_mtcache_last_update + cachetime) {
+ uint64_t memtableCount;
+ uint64_t memtableSize;
+
+ rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r,
+ &memtableCount, &memtableSize);
+
+ // Atomically update all of these fields at the same time
+ if (cachetime > 0) {
+ if (m_table_handler->m_mtcache_lock.fetch_add(
+ 1, std::memory_order_acquire) == 0) {
+ m_table_handler->m_mtcache_count = memtableCount;
+ m_table_handler->m_mtcache_size = memtableSize;
+ m_table_handler->m_mtcache_last_update = time;
+ }
+ m_table_handler->m_mtcache_lock.fetch_sub(1,
+ std::memory_order_release);
+ }
+
+ stats.records += memtableCount;
+ stats.data_file_length += memtableSize;
+ } else {
+ // Cached data is still valid, so use it instead
+ stats.records += m_table_handler->m_mtcache_count;
+ stats.data_file_length += m_table_handler->m_mtcache_size;
+ }
if (rocksdb_debug_optimizer_n_rows > 0)
stats.records = rocksdb_debug_optimizer_n_rows;
@@ -8242,6 +9540,11 @@ void ha_rocksdb::position(const uchar *const record) {
DBUG_VOID_RETURN;
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::rnd_pos(uchar *const buf, uchar *const pos) {
DBUG_ENTER_FUNC();
@@ -8254,7 +9557,7 @@ int ha_rocksdb::rnd_pos(uchar *const buf, uchar *const pos) {
len = m_pk_descr->key_length(table,
rocksdb::Slice((const char *)pos, ref_length));
if (len == size_t(-1)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR); /* Data corruption? */
+ DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); /* Data corruption? */
}
rc = get_row_by_rowid(buf, pos, len);
@@ -8303,6 +9606,11 @@ void ha_rocksdb::calc_updated_indexes() {
}
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
+*/
int ha_rocksdb::update_row(const uchar *const old_data, uchar *const new_data) {
DBUG_ENTER_FUNC();
@@ -8439,29 +9747,34 @@ const char *ha_rocksdb::thd_rocksdb_tmpdir() {
return (tmp_dir);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (cannot be SE-specific)
+*/
int ha_rocksdb::external_lock(THD *const thd, int lock_type) {
DBUG_ENTER_FUNC();
DBUG_ASSERT(thd != nullptr);
- int res = 0;
+ int res = HA_EXIT_SUCCESS;
int binlog_format = my_core::thd_binlog_format(thd);
bool unsafe_for_binlog = THDVAR(ha_thd(), unsafe_for_binlog);
if (lock_type == F_WRLCK && !thd->rgi_slave && !unsafe_for_binlog &&
binlog_format != BINLOG_FORMAT_ROW &&
binlog_format != BINLOG_FORMAT_UNSPEC &&
my_core::thd_binlog_filter_ok(thd)) {
- my_printf_error(
- ER_UNKNOWN_ERROR,
- "Can't execute updates on master with binlog_format != ROW.", MYF(0));
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ my_error(ER_REQUIRE_ROW_BINLOG_FORMAT, MYF(0));
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
}
if (lock_type == F_UNLCK) {
Rdb_transaction *const tx = get_or_create_tx(thd);
tx->io_perf_end_and_record(&m_io_perf);
- if (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
+ tx->m_n_mysql_tables_in_use--;
+ if (tx->m_n_mysql_tables_in_use == 0 &&
+ !my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
/*
Do like InnoDB: when we get here, it's time to commit a
single-statement transaction.
@@ -8470,18 +9783,16 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) {
for each of them, but that's ok because non-first tx->commit() calls
will be no-ops.
*/
- res = tx->commit_or_rollback();
+ if (tx->commit_or_rollback()) {
+ res = HA_ERR_INTERNAL_ERROR;
+ }
}
} else {
if (my_core::thd_tx_isolation(thd) < ISO_READ_COMMITTED ||
my_core::thd_tx_isolation(thd) > ISO_REPEATABLE_READ) {
- my_printf_error(
- ER_UNKNOWN_ERROR, "MyRocks supports only READ COMMITTED and "
- "REPEATABLE READ isolation levels. "
- "Please change from current isolation "
- "level %s",
- MYF(0), tx_isolation_names[my_core::thd_tx_isolation(thd)]);
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ my_error(ER_ISOLATION_MODE_NOT_SUPPORTED, MYF(0),
+ tx_isolation_names[my_core::thd_tx_isolation(thd)]);
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
}
/*
It's nice to do the following on start of every statement. The problem
@@ -8499,25 +9810,15 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) {
thd->lex->sql_command == SQLCOM_REPLACE) &&
(thd->lex->duplicates == DUP_REPLACE ||
thd->lex->duplicates == DUP_UPDATE)) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "When unique checking is disabled in MyRocks, INSERT,"
- "UPDATE,LOAD statements with clauses that update or "
- "replace the key (i.e. "
- "INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not "
- "allowed. Query: %s",
- MYF(0), thd->query());
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ my_error(ER_ON_DUPLICATE_DISABLED, MYF(0), thd->query());
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
}
}
if (lock_type == F_WRLCK) {
if (tx->is_tx_read_only()) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "Can't execute updates when you started a transaction "
- "with START TRANSACTION WITH CONSISTENT [ROCKSDB] "
- "SNAPSHOT.",
- MYF(0));
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ my_error(ER_UPDATES_WITH_CONSISTENT_SNAPSHOT, MYF(0));
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
}
/*
@@ -8535,6 +9836,7 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) {
tx->m_ddl_transaction = true;
}
}
+ tx->m_n_mysql_tables_in_use++;
rocksdb_register_tx(rocksdb_hton, thd, tx);
tx->io_perf_start(&m_io_perf);
}
@@ -8550,6 +9852,9 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) {
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders.
</quote>
+
+ @return
+ HA_EXIT_SUCCESS OK
*/
int ha_rocksdb::start_stmt(THD *const thd, thr_lock_type lock_type) {
@@ -8608,7 +9913,7 @@ static bool is_myrocks_index_empty(
uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0};
rdb_netbuf_store_uint32(key_buf, index_id);
const rocksdb::Slice key =
- rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
+ rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(read_opts, cfh));
rocksdb_smart_seek(is_reverse_cf, it.get(), key);
if (!it->Valid()) {
@@ -8683,10 +9988,6 @@ void Rdb_drop_index_thread::run() {
uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0,
is_reverse_cf ? 0 : 1);
- rocksdb::CompactRangeOptions compact_range_options;
- compact_range_options.bottommost_level_compaction =
- rocksdb::BottommostLevelCompaction::kForce;
- compact_range_options.exclusive_manual_compaction = false;
rocksdb::Status status = DeleteFilesInRange(rdb->GetBaseDB(), cfh,
&range.start, &range.limit);
if (!status.ok()) {
@@ -8695,7 +9996,7 @@ void Rdb_drop_index_thread::run() {
}
rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD);
}
- status = rdb->CompactRange(compact_range_options, cfh, &range.start,
+ status = rdb->CompactRange(getCompactRangeOptions(), cfh, &range.start,
&range.limit);
if (!status.ok()) {
if (status.IsShutdownInProgress()) {
@@ -8723,7 +10024,7 @@ Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) {
DBUG_ASSERT(tablename != nullptr);
std::string str;
- if (rdb_normalize_tablename(tablename, &str)) {
+ if (rdb_normalize_tablename(tablename, &str) != HA_EXIT_SUCCESS) {
// We were not passed table name?
DBUG_ASSERT(0);
return nullptr;
@@ -8737,6 +10038,10 @@ Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) {
this->table==nullptr, pk_key_descr==nullptr, etc.
tablename points to line in form "./dbname/tablename".
+
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (can be SE-specific)
*/
int ha_rocksdb::delete_table(const char *const tablename) {
@@ -8758,24 +10063,27 @@ int ha_rocksdb::delete_table(const char *const tablename) {
the persistent data dictionary).
*/
ddl_manager.remove(tbl, batch, true);
- if (dict_manager.commit(batch))
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ int err = dict_manager.commit(batch);
+ if (err) {
+ DBUG_RETURN(err);
+ }
rdb_drop_idx_thread.signal();
DBUG_RETURN(HA_EXIT_SUCCESS);
}
-void ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
+int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
const rocksdb::WriteOptions wo =
rdb_get_rocksdb_write_options(handler::ha_thd());
rocksdb::ReadOptions opts;
opts.total_order_seek = true;
- rocksdb::Iterator *const it = rdb->NewIterator(opts);
+ Rdb_transaction *const tx = get_or_create_tx(table->in_use);
char key_buf[MAX_KEY_LENGTH];
uint key_len;
+ ulonglong bytes_written = 0;
/*
Remove all records in each index.
(This is is not crash-safe, but it doesn't matter, because bulk row
@@ -8784,6 +10092,9 @@ void ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
for (uint i = 0; i < tbl->m_key_count; i++) {
const Rdb_key_def &kd = *tbl->m_key_descr_arr[i];
kd.get_infimum_key(reinterpret_cast<uchar *>(key_buf), &key_len);
+ rocksdb::ColumnFamilyHandle *cf = kd.get_cf();
+
+ std::unique_ptr<rocksdb::Iterator> it(rdb->NewIterator(opts, cf));
const rocksdb::Slice table_key(key_buf, key_len);
it->Seek(table_key);
@@ -8793,78 +10104,85 @@ void ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) {
break;
}
- if (can_use_single_delete(i))
- rdb->SingleDelete(wo, key);
- else
- rdb->Delete(wo, key);
+ rocksdb::Status s;
+ if (can_use_single_delete(i)) {
+ s = rdb->SingleDelete(wo, cf, key);
+ } else {
+ s = rdb->Delete(wo, cf, key);
+ }
+
+ if (!s.ok()) {
+ return tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def,
+ m_table_handler);
+ }
+ bytes_written += key.size();
it->Next();
}
}
- delete it;
+
+ tx->update_bytes_written(bytes_written);
+
+ return HA_EXIT_SUCCESS;
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code (cannot be SE-specific)
+*/
int ha_rocksdb::rename_table(const char *const from, const char *const to) {
DBUG_ENTER_FUNC();
DBUG_ASSERT(from != nullptr);
DBUG_ASSERT(to != nullptr);
- bool write_err = false;
std::string from_str;
std::string to_str;
std::string from_db;
std::string to_db;
- if (rdb_normalize_tablename(from, &from_str) ||
- rdb_split_normalized_tablename(from_str, &from_db)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ int rc = rdb_normalize_tablename(from, &from_str);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ rc = rdb_split_normalized_tablename(from_str, &from_db);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
+ }
+
+ rc = rdb_normalize_tablename(to, &to_str);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
}
- if (rdb_normalize_tablename(to, &to_str) ||
- rdb_split_normalized_tablename(to_str, &to_db)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ rc = rdb_split_normalized_tablename(to_str, &to_db);
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
}
// If the user changed the database part of the name then validate that the
// 'to' database exists.
if (from_db != to_db && !rdb_database_exists(to_db)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ // If we return a RocksDB specific error code here we get
+ // "error: 206 - Unknown error 206". InnoDB gets
+ // "error -1 - Unknown error -1" so let's match them.
+ DBUG_RETURN(-1);
}
- /* Check if any index has a per-index column family */
- {
- /*
- We can't check this directly, because
- 1. the object that referred to the old table has been close()d.
- 2. this ha_rocksdb object has no connection to any table at all, it has
- been just created with (TABLE*)nullptr.
-
- So, we have to find the old table in the ddl_manager (it's there because
- it has been opened by this statement), and check is_auto_cf there.
- */
- Rdb_tbl_def *tdef;
- if (!(tdef = ddl_manager.find(from_str)))
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
-
- for (uint i = 0; i < tdef->m_key_count; i++) {
- DBUG_ASSERT(tdef->m_key_descr_arr != nullptr);
-
- if (tdef->m_key_descr_arr[i]->m_is_auto_cf) {
- my_error(ER_NOT_SUPPORTED_YET, MYF(0),
- "ALTER TABLE on table with per-index CF");
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
- }
- }
+ DBUG_EXECUTE_IF("gen_sql_table_name", to_str = to_str + "#sql-test";);
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
dict_manager.lock();
- write_err =
- ddl_manager.rename(from_str, to_str, batch) || dict_manager.commit(batch);
+ if (ddl_manager.rename(from_str, to_str, batch)) {
+ rc = HA_ERR_NO_SUCH_TABLE;
+ } else {
+ rc = dict_manager.commit(batch);
+ }
dict_manager.unlock();
- DBUG_RETURN(write_err ? HA_ERR_INTERNAL_ERROR : 0);
+ DBUG_RETURN(rc);
}
/**
@@ -8888,6 +10206,10 @@ bool ha_rocksdb::check_if_incompatible_data(HA_CREATE_INFO *const info,
DBUG_RETURN(COMPATIBLE_DATA_NO);
}
+/**
+ @return
+ HA_EXIT_SUCCESS OK
+*/
int ha_rocksdb::extra(enum ha_extra_function operation) {
DBUG_ENTER_FUNC();
@@ -9038,6 +10360,10 @@ void ha_rocksdb::update_create_info(HA_CREATE_INFO *const create_info) {
the first row of the next index id. When using reverse order
column family, the first row of the next index id should be
the last row of the previous index id.
+
+ @return
+ HA_ADMIN_OK OK
+ other HA_ADMIN error code
*/
int ha_rocksdb::optimize(THD *const thd, HA_CHECK_OPT *const check_opt) {
DBUG_ENTER_FUNC();
@@ -9045,20 +10371,18 @@ int ha_rocksdb::optimize(THD *const thd, HA_CHECK_OPT *const check_opt) {
DBUG_ASSERT(thd != nullptr);
DBUG_ASSERT(check_opt != nullptr);
- int rc = 0;
for (uint i = 0; i < table->s->keys; i++) {
uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2];
auto range = get_range(i, buf);
- if (!rdb->CompactRange(rocksdb::CompactRangeOptions(),
- m_key_descr_arr[i]->get_cf(), &range.start,
- &range.limit)
- .ok()) {
- rc = 1;
- break;
+ const rocksdb::Status s = rdb->CompactRange(getCompactRangeOptions(),
+ m_key_descr_arr[i]->get_cf(),
+ &range.start, &range.limit);
+ if (!s.ok()) {
+ DBUG_RETURN(rdb_error_to_mysql(s));
}
}
- DBUG_RETURN(rc);
+ DBUG_RETURN(HA_EXIT_SUCCESS);
}
int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
@@ -9096,8 +10420,10 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
const auto status = rdb->GetPropertiesOfTablesInRange(
it.first, &it.second[0], it.second.size(), &props);
DBUG_ASSERT(props.size() >= old_size);
- if (!status.ok())
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ if (!status.ok()) {
+ DBUG_RETURN(
+ rdb_error_to_mysql(status, "Could not access RocksDB properties"));
+ }
}
int num_sst = 0;
@@ -9143,14 +10469,19 @@ int ha_rocksdb::calculate_stats(const TABLE *const table_arg, THD *const thd,
DBUG_RETURN(HA_EXIT_SUCCESS);
}
+/*
+ @return
+ HA_ADMIN_OK OK
+ other HA_ADMIN error code
+*/
int ha_rocksdb::analyze(THD *const thd, HA_CHECK_OPT *const check_opt) {
DBUG_ENTER_FUNC();
- if (!table) {
- DBUG_RETURN(HA_EXIT_FAILURE);
+ if (table && calculate_stats(table, thd, check_opt) != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(HA_ADMIN_FAILED);
}
- DBUG_RETURN(calculate_stats(table, thd, check_opt));
+ DBUG_RETURN(HA_ADMIN_OK);
}
void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
@@ -9178,27 +10509,61 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
off = 1;
}
+ longlong new_val;
+
// Local variable reference to simplify code below
std::atomic<longlong> &auto_incr = m_tbl_def->m_auto_incr_val;
if (inc == 1 && off == 1) {
// Optimization for the standard case where we are always simply
// incrementing from the last position
- *first_value = auto_incr++;
+
+ // Use CAS operation in a loop to make sure automically get the next auto
+ // increment value while ensuring tha we don't wrap around to a negative
+ // number.
+ new_val = auto_incr;
+ while (new_val != std::numeric_limits<longlong>::max()) {
+ if (auto_incr.compare_exchange_weak(new_val, new_val + 1)) {
+ break;
+ }
+ }
} else {
// The next value can be more complicated if either `inc` or 'off' is not 1
- longlong new_val;
longlong last_val = auto_incr;
// Loop until we can correctly update the atomic value
do {
+ if (((last_val - off) / inc) ==
+ (std::numeric_limits<longlong>::max() - off) / inc) {
+ // The 'last_val' value is already equal to or larger than the largest
+ // value in the sequence. Continuing would wrap around (technically
+ // the behavior would be undefined). What should we do?
+ // We could:
+ // 1) set the new value to the last possible number in our sequence
+ // as described above. The problem with this is that this
+ // number could be smaller than a value in an existing row.
+ // 2) set the new value to the largest possible number. This number
+ // may not be in our sequence, but it is guaranteed to be equal
+ // to or larger than any other value already inserted.
+ //
+ // For now I'm going to take option @2.
+ new_val = std::numeric_limits<longlong>::max();
+ auto_incr = new_val; // Store the largest value into auto_incr
+ break;
+ }
+
// Calculate the next value in the auto increment series:
// offset + N * increment
// where N is 0, 1, 2, ...
//
// For further information please visit:
// http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html
- new_val = ((last_val + (inc - off) - 1) / inc) * inc + off;
+ //
+ // The following is confusing so here is an explanation:
+ // To get the next number in the sequence above you subtract out
+ // the offset, calculate the next sequence (N * increment) and then add
+ // the offset back in.
+ new_val = (((last_val - off) + (inc - 1)) / inc) * inc + off;
// Attempt to store the new value (plus 1 since m_auto_incr_val contains
// the next available value) into the atomic value. If the current
@@ -9206,10 +10571,9 @@ void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc,
// we will repeat the loop (`last_val` will automatically get updated
// with the current value).
} while (!auto_incr.compare_exchange_weak(last_val, new_val + 1));
-
- *first_value = new_val;
}
+ *first_value = new_val;
*nb_reserved_values = 1;
}
@@ -9268,47 +10632,6 @@ class Item *ha_rocksdb::idx_cond_push(uint keyno, class Item *const idx_cond) {
DBUG_RETURN(nullptr);
}
-/**
- Checking if an index is used for ascending scan or not
-
- @detail
- Currently RocksDB does not support bloom filter for
- prefix lookup + descending scan, but supports bloom filter for
- prefix lookup + ascending scan. This function returns true if
- the scan pattern is absolutely ascending.
- @param kd
- @param find_flag
-*/
-bool ha_rocksdb::is_ascending(const Rdb_key_def &kd,
- enum ha_rkey_function find_flag) const {
- bool is_ascending;
- switch (find_flag) {
- case HA_READ_KEY_EXACT: {
- is_ascending = !kd.m_is_reverse_cf;
- break;
- }
- case HA_READ_PREFIX: {
- is_ascending = true;
- break;
- }
- case HA_READ_KEY_OR_NEXT:
- case HA_READ_AFTER_KEY: {
- is_ascending = !kd.m_is_reverse_cf;
- break;
- }
- case HA_READ_KEY_OR_PREV:
- case HA_READ_BEFORE_KEY:
- case HA_READ_PREFIX_LAST:
- case HA_READ_PREFIX_LAST_OR_PREV: {
- is_ascending = kd.m_is_reverse_cf;
- break;
- }
- default:
- is_ascending = false;
- }
- return is_ascending;
-}
-
/*
Checks if inplace alter is supported for a given operation.
*/
@@ -9374,8 +10697,6 @@ bool ha_rocksdb::prepare_inplace_alter_table(
DBUG_ASSERT(altered_table != nullptr);
DBUG_ASSERT(ha_alter_info != nullptr);
- const uint n_dropped_keys = ha_alter_info->index_drop_count;
- const uint n_added_keys = ha_alter_info->index_add_count;
const uint old_n_keys = m_tbl_def->m_key_count;
uint new_n_keys = altered_table->s->keys;
@@ -9383,6 +10704,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
new_n_keys += 1;
}
+ const TABLE *const old_table = table;
std::shared_ptr<Rdb_key_def> *const old_key_descr =
m_tbl_def->m_key_descr_arr;
std::shared_ptr<Rdb_key_def> *const new_key_descr =
@@ -9412,9 +10734,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
new_tdef->m_key_descr_arr = nullptr;
delete new_tdef;
- my_printf_error(ER_UNKNOWN_ERROR,
- "MyRocks failed creating new key definitions during alter.",
- MYF(0));
+ my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0));
DBUG_RETURN(HA_EXIT_FAILURE);
}
@@ -9423,33 +10743,60 @@ bool ha_rocksdb::prepare_inplace_alter_table(
uint i;
uint j;
- const KEY *key;
/* Determine which(if any) key definition(s) need to be dropped */
for (i = 0; i < ha_alter_info->index_drop_count; i++) {
- const KEY *const key = ha_alter_info->index_drop_buffer[i];
+ const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i];
for (j = 0; j < old_n_keys; j++) {
- if (!old_key_descr[j]->m_name.compare(key->name)) {
+ const KEY *const old_key =
+ &old_table->key_info[old_key_descr[j]->get_keyno()];
+
+ if (!compare_keys(old_key, dropped_key)) {
dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id());
break;
}
}
}
- DBUG_ASSERT(dropped_index_ids.size() == ha_alter_info->index_drop_count);
-
/* Determine which(if any) key definitions(s) need to be added */
+ int identical_indexes_found = 0;
for (i = 0; i < ha_alter_info->index_add_count; i++) {
- key = &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
+ const KEY *const added_key =
+ &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
for (j = 0; j < new_n_keys; j++) {
- if (!new_key_descr[j]->m_name.compare(key->name)) {
- added_indexes.insert(new_key_descr[j]);
+ const KEY *const new_key =
+ &altered_table->key_info[new_key_descr[j]->get_keyno()];
+ if (!compare_keys(new_key, added_key)) {
+ /*
+ Check for cases where an 'identical' index is being dropped and
+ re-added in a single ALTER statement. Turn this into a no-op as the
+ index has not changed.
+
+ E.G. Unique index -> non-unique index requires no change
+
+ Note that cases where the index name remains the same but the
+ key-parts are changed is already handled in create_inplace_key_defs.
+ In these cases the index needs to be rebuilt.
+ */
+ if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) {
+ dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id());
+ identical_indexes_found++;
+ } else {
+ added_indexes.insert(new_key_descr[j]);
+ }
+
break;
}
}
}
- DBUG_ASSERT(added_indexes.size() == ha_alter_info->index_add_count);
+ const uint n_dropped_keys =
+ ha_alter_info->index_drop_count - identical_indexes_found;
+ const uint n_added_keys =
+ ha_alter_info->index_add_count - identical_indexes_found;
+ DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys);
+ DBUG_ASSERT(added_indexes.size() == n_added_keys);
+ DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys));
ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx(
new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys,
@@ -9509,17 +10856,13 @@ bool ha_rocksdb::inplace_alter_table(
altered_table, ctx->m_new_tdef,
ha_alter_info->handler_flags &
my_core::Alter_inplace_info::ADD_UNIQUE_INDEX))) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "MyRocks failed allocating key buffers during alter.",
- MYF(0));
+ my_error(ER_OUT_OF_RESOURCES, MYF(0));
DBUG_RETURN(err);
}
/* Populate all new secondary keys by scanning the primary key. */
if ((err = inplace_populate_sk(altered_table, ctx->m_added_indexes))) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "MyRocks failed populating secondary key during alter.",
- MYF(0));
+ my_error(ER_SK_POPULATE_DURING_ALTER, MYF(0));
DBUG_RETURN(HA_EXIT_FAILURE);
}
}
@@ -9539,7 +10882,7 @@ int ha_rocksdb::inplace_populate_sk(
TABLE *const new_table_arg,
const std::unordered_set<std::shared_ptr<Rdb_key_def>> &indexes) {
DBUG_ENTER_FUNC();
-
+ int res = HA_EXIT_SUCCESS;
const std::unique_ptr<rocksdb::WriteBatch> wb = dict_manager.begin();
rocksdb::WriteBatch *const batch = wb.get();
@@ -9549,7 +10892,10 @@ int ha_rocksdb::inplace_populate_sk(
create_index_ids.insert(index->get_gl_index_id());
}
dict_manager.add_create_index(create_index_ids, batch);
- dict_manager.commit(batch);
+ res = dict_manager.commit(batch);
+ if (res != HA_EXIT_SUCCESS) {
+ return res;
+ }
/*
Add uncommitted key definitons to ddl_manager. We need to do this
@@ -9562,7 +10908,6 @@ int ha_rocksdb::inplace_populate_sk(
const bool hidden_pk_exists = has_hidden_pk(table);
- int res = 0;
Rdb_transaction *tx = get_or_create_tx(table->in_use);
/*
@@ -9593,7 +10938,7 @@ int ha_rocksdb::inplace_populate_sk(
open handlers have been closed at this point, and the one we're on is the
only one left.
*/
- if (m_sst_info != nullptr) {
+ if (m_sst_info) {
if ((res = finalize_bulk_load())) {
DBUG_RETURN(res);
}
@@ -9603,14 +10948,16 @@ int ha_rocksdb::inplace_populate_sk(
const ulonglong rdb_merge_buf_size = THDVAR(ha_thd(), merge_buf_size);
const ulonglong rdb_merge_combine_read_size =
THDVAR(ha_thd(), merge_combine_read_size);
+ const ulonglong rdb_merge_tmp_file_removal_delay =
+ THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms);
for (const auto &index : indexes) {
- const rocksdb::Comparator *index_comp = index->get_cf()->GetComparator();
bool is_unique_index =
new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME;
- Rdb_index_merge rdb_merge(thd_rocksdb_tmpdir(), rdb_merge_buf_size,
- rdb_merge_combine_read_size, index_comp);
+ Rdb_index_merge rdb_merge(
+ thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size,
+ rdb_merge_tmp_file_removal_delay, index->get_cf());
if ((res = rdb_merge.init())) {
DBUG_RETURN(res);
@@ -9628,17 +10975,19 @@ int ha_rocksdb::inplace_populate_sk(
for (res = index_first(table->record[0]); res == 0;
res = index_next(table->record[0])) {
longlong hidden_pk_id = 0;
- if (hidden_pk_exists && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) {
+ if (hidden_pk_exists &&
+ (res = read_hidden_pk_id_from_rowkey(&hidden_pk_id))) {
// NO_LINT_DEBUG
sql_print_error("Error retrieving hidden pk id.");
ha_index_end();
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ DBUG_RETURN(res);
}
/* Create new secondary index entry */
const int new_packed_size = index->pack_record(
new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple,
- &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id);
+ &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0,
+ nullptr, nullptr, m_ttl_bytes);
const rocksdb::Slice key = rocksdb::Slice(
reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size);
@@ -9695,7 +11044,7 @@ int ha_rocksdb::inplace_populate_sk(
print_keydup_error(new_table_arg,
&new_table_arg->key_info[index->get_keyno()],
- MYF(0));
+ MYF(0), ha_thd());
DBUG_RETURN(ER_DUP_ENTRY);
}
}
@@ -9703,7 +11052,7 @@ int ha_rocksdb::inplace_populate_sk(
/*
Insert key and slice to SST via SSTFileWriter API.
*/
- if ((res = bulk_load_key(tx, *index, merge_key, merge_val))) {
+ if ((res = bulk_load_key(tx, *index, merge_key, merge_val, false))) {
break;
}
}
@@ -9959,9 +11308,7 @@ struct rocksdb_status_counters_t {
uint64_t no_file_closes;
uint64_t no_file_opens;
uint64_t no_file_errors;
- uint64_t l0_slowdown_micros;
- uint64_t memtable_compaction_micros;
- uint64_t l0_num_files_stall_micros;
+ uint64_t stall_micros;
uint64_t rate_limit_delay_millis;
uint64_t num_iterators;
uint64_t number_multiget_get;
@@ -10015,9 +11362,7 @@ DEF_SHOW_FUNC(bytes_read, BYTES_READ)
DEF_SHOW_FUNC(no_file_closes, NO_FILE_CLOSES)
DEF_SHOW_FUNC(no_file_opens, NO_FILE_OPENS)
DEF_SHOW_FUNC(no_file_errors, NO_FILE_ERRORS)
-DEF_SHOW_FUNC(l0_slowdown_micros, STALL_L0_SLOWDOWN_MICROS)
-DEF_SHOW_FUNC(memtable_compaction_micros, STALL_MEMTABLE_COMPACTION_MICROS)
-DEF_SHOW_FUNC(l0_num_files_stall_micros, STALL_L0_NUM_FILES_MICROS)
+DEF_SHOW_FUNC(stall_micros, STALL_MICROS)
DEF_SHOW_FUNC(rate_limit_delay_millis, RATE_LIMIT_DELAY_MILLIS)
DEF_SHOW_FUNC(num_iterators, NO_ITERATORS)
DEF_SHOW_FUNC(number_multiget_get, NUMBER_MULTIGET_CALLS)
@@ -10051,11 +11396,31 @@ static void myrocks_update_status() {
export_stats.rows_read = global_stats.rows[ROWS_READ];
export_stats.rows_updated = global_stats.rows[ROWS_UPDATED];
export_stats.rows_deleted_blind = global_stats.rows[ROWS_DELETED_BLIND];
+ export_stats.rows_expired = global_stats.rows[ROWS_EXPIRED];
export_stats.system_rows_deleted = global_stats.system_rows[ROWS_DELETED];
export_stats.system_rows_inserted = global_stats.system_rows[ROWS_INSERTED];
export_stats.system_rows_read = global_stats.system_rows[ROWS_READ];
export_stats.system_rows_updated = global_stats.system_rows[ROWS_UPDATED];
+
+ export_stats.queries_point = global_stats.queries[QUERIES_POINT];
+ export_stats.queries_range = global_stats.queries[QUERIES_RANGE];
+
+ export_stats.covered_secondary_key_lookups =
+ global_stats.covered_secondary_key_lookups;
+}
+
+static void myrocks_update_memory_status() {
+ std::vector<rocksdb::DB *> dbs;
+ std::unordered_set<const rocksdb::Cache *> cache_set;
+ dbs.push_back(rdb);
+ std::map<rocksdb::MemoryUtil::UsageType, uint64_t> temp_usage_by_type;
+ rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set,
+ &temp_usage_by_type);
+ memory_stats.memtable_total =
+ temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal];
+ memory_stats.memtable_unflushed =
+ temp_usage_by_type[rocksdb::MemoryUtil::kMemTableUnFlushed];
}
static SHOW_VAR myrocks_status_variables[] = {
@@ -10066,8 +11431,10 @@ static SHOW_VAR myrocks_status_variables[] = {
DEF_STATUS_VAR_FUNC("rows_read", &export_stats.rows_read, SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("rows_updated", &export_stats.rows_updated,
SHOW_LONGLONG),
- DEF_STATUS_VAR_FUNC("rows_deleted_blind",
- &export_stats.rows_deleted_blind, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_deleted_blind", &export_stats.rows_deleted_blind,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("rows_expired", &export_stats.rows_expired,
+ SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("system_rows_deleted",
&export_stats.system_rows_deleted, SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("system_rows_inserted",
@@ -10076,15 +11443,112 @@ static SHOW_VAR myrocks_status_variables[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_FUNC("system_rows_updated",
&export_stats.system_rows_updated, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_total", &memory_stats.memtable_total,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_unflushed", &memory_stats.memtable_unflushed,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("queries_point", &export_stats.queries_point,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("queries_range", &export_stats.queries_range,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("covered_secondary_key_lookups",
+ &export_stats.covered_secondary_key_lookups,
+ SHOW_LONGLONG),
{NullS, NullS, SHOW_LONG}};
static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) {
myrocks_update_status();
+ myrocks_update_memory_status();
var->type = SHOW_ARRAY;
var->value = reinterpret_cast<char *>(&myrocks_status_variables);
}
+static ulonglong
+io_stall_prop_value(const std::map<std::string, std::string> &props,
+ const std::string &key) {
+ std::map<std::string, std::string>::const_iterator iter =
+ props.find("io_stalls." + key);
+ if (iter != props.end()) {
+ return std::stoull(iter->second);
+ } else {
+ DBUG_PRINT("warning",
+ ("RocksDB GetMapPropery hasn't returned key=%s", key.c_str()));
+ DBUG_ASSERT(0);
+ return 0;
+ }
+}
+
+static void update_rocksdb_stall_status() {
+ st_io_stall_stats local_io_stall_stats;
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
+ continue;
+ }
+
+ std::map<std::string, std::string> props;
+ if (!rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props)) {
+ continue;
+ }
+
+ local_io_stall_stats.level0_slowdown +=
+ io_stall_prop_value(props, "level0_slowdown");
+ local_io_stall_stats.level0_slowdown_with_compaction +=
+ io_stall_prop_value(props, "level0_slowdown_with_compaction");
+ local_io_stall_stats.level0_numfiles +=
+ io_stall_prop_value(props, "level0_numfiles");
+ local_io_stall_stats.level0_numfiles_with_compaction +=
+ io_stall_prop_value(props, "level0_numfiles_with_compaction");
+ local_io_stall_stats.stop_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "stop_for_pending_compaction_bytes");
+ local_io_stall_stats.slowdown_for_pending_compaction_bytes +=
+ io_stall_prop_value(props, "slowdown_for_pending_compaction_bytes");
+ local_io_stall_stats.memtable_compaction +=
+ io_stall_prop_value(props, "memtable_compaction");
+ local_io_stall_stats.memtable_slowdown +=
+ io_stall_prop_value(props, "memtable_slowdown");
+ local_io_stall_stats.total_stop += io_stall_prop_value(props, "total_stop");
+ local_io_stall_stats.total_slowdown +=
+ io_stall_prop_value(props, "total_slowdown");
+ }
+ io_stall_stats = local_io_stall_stats;
+}
+
+static SHOW_VAR rocksdb_stall_status_variables[] = {
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_slowdowns",
+ &io_stall_stats.level0_slowdown_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_stops",
+ &io_stall_stats.level0_numfiles_with_compaction,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_stops",
+ &io_stall_stats.stop_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("pending_compaction_limit_slowdowns",
+ &io_stall_stats.slowdown_for_pending_compaction_bytes,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_stops",
+ &io_stall_stats.memtable_compaction, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("memtable_limit_slowdowns",
+ &io_stall_stats.memtable_slowdown, SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_stops", &io_stall_stats.total_stop,
+ SHOW_LONGLONG),
+ DEF_STATUS_VAR_FUNC("total_slowdowns", &io_stall_stats.total_slowdown,
+ SHOW_LONGLONG),
+ // end of the array marker
+ {NullS, NullS, SHOW_LONG}};
+
+static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) {
+ update_rocksdb_stall_status();
+ var->type = SHOW_ARRAY;
+ var->value = reinterpret_cast<char *>(&rocksdb_stall_status_variables);
+}
+
static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(block_cache_miss),
DEF_STATUS_VAR(block_cache_hit),
@@ -10109,9 +11573,7 @@ static SHOW_VAR rocksdb_status_vars[] = {
DEF_STATUS_VAR(no_file_closes),
DEF_STATUS_VAR(no_file_opens),
DEF_STATUS_VAR(no_file_errors),
- DEF_STATUS_VAR(l0_slowdown_micros),
- DEF_STATUS_VAR(memtable_compaction_micros),
- DEF_STATUS_VAR(l0_num_files_stall_micros),
+ DEF_STATUS_VAR(stall_micros),
DEF_STATUS_VAR(rate_limit_delay_millis),
DEF_STATUS_VAR(num_iterators),
DEF_STATUS_VAR(number_multiget_get),
@@ -10154,7 +11616,12 @@ static SHOW_VAR rocksdb_status_vars[] = {
SHOW_LONGLONG),
DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other,
SHOW_LONGLONG),
+ // the variables generated by SHOW_FUNC are sorted only by prefix (first
+ // arg in the tuple below), so make sure it is unique to make sorting
+ // deterministic as quick sort is not stable
{"rocksdb", reinterpret_cast<char *>(&show_myrocks_vars), SHOW_FUNC},
+ {"rocksdb_stall", reinterpret_cast<char *>(&show_rocksdb_stall_vars),
+ SHOW_FUNC},
{NullS, NullS, SHOW_LONG}};
/*
@@ -10201,10 +11668,13 @@ void Rdb_background_thread::run() {
// pthread_cond_timedwait()) to wait on.
set_timespec(ts_next_sync, WAKE_UP_INTERVAL);
- // Flush the WAL.
- if (rdb && rocksdb_background_sync) {
- DBUG_ASSERT(!rocksdb_db_options.allow_mmap_writes);
- const rocksdb::Status s = rdb->SyncWAL();
+ // Flush the WAL. Sync it for both background and never modes to copy
+ // InnoDB's behavior. For mode never, the wal file isn't even written,
+ // whereas background writes to the wal file, but issues the syncs in a
+ // background thread.
+ if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) {
+ DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes);
+ const rocksdb::Status s = rdb->FlushWAL(true);
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
}
@@ -10235,7 +11705,7 @@ void Rdb_background_thread::run() {
*/
bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd,
const rocksdb::Slice &eq_cond,
- const bool use_all_keys, bool is_ascending) {
+ const bool use_all_keys) {
bool can_use = false;
if (THDVAR(thd, skip_bloom_filter_on_read)) {
@@ -10286,8 +11756,36 @@ rocksdb::TransactionDB *rdb_get_rocksdb_db() { return rdb; }
Rdb_cf_manager &rdb_get_cf_manager() { return cf_manager; }
-rocksdb::BlockBasedTableOptions &rdb_get_table_options() {
- return rocksdb_tbl_options;
+const rocksdb::BlockBasedTableOptions &rdb_get_table_options() {
+ return *rocksdb_tbl_options;
+}
+
+bool rdb_is_ttl_enabled() { return rocksdb_enable_ttl; }
+bool rdb_is_ttl_read_filtering_enabled() {
+ return rocksdb_enable_ttl_read_filtering;
+}
+#ifndef NDEBUG
+int rdb_dbug_set_ttl_rec_ts() { return rocksdb_debug_ttl_rec_ts; }
+int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; }
+int rdb_dbug_set_ttl_read_filter_ts() {
+ return rocksdb_debug_ttl_read_filter_ts;
+}
+bool rdb_dbug_set_ttl_ignore_pk() { return rocksdb_debug_ttl_ignore_pk; }
+#endif
+
+void rdb_update_global_stats(const operation_type &type, uint count,
+ bool is_system_table) {
+ DBUG_ASSERT(type < ROWS_MAX);
+
+ if (count == 0) {
+ return;
+ }
+
+ if (is_system_table) {
+ global_stats.system_rows[type].add(count);
+ } else {
+ global_stats.rows[type].add(count);
+ }
}
int rdb_get_table_perf_counters(const char *const tablename,
@@ -10298,7 +11796,7 @@ int rdb_get_table_perf_counters(const char *const tablename,
Rdb_table_handler *table_handler;
table_handler = rdb_open_tables.get_table_handler(tablename);
if (table_handler == nullptr) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_INVALID_TABLE;
}
counters->load(table_handler->m_table_perf_context);
@@ -10342,30 +11840,18 @@ void rdb_handle_io_error(const rocksdb::Status status,
switch (err_type) {
case RDB_IO_ERROR_TX_COMMIT:
case RDB_IO_ERROR_DICT_COMMIT: {
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: failed to write to WAL. Error type = %s, "
- "status code = %d, status = %s",
- get_rdb_io_error_string(err_type), status.code(),
- status.ToString().c_str());
+ rdb_log_status_error(status, "failed to write to WAL");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on WAL write error.");
abort_with_stack_traces();
break;
}
case RDB_IO_ERROR_BG_THREAD: {
- /* NO_LINT_DEBUG */
- sql_print_warning("MyRocks: BG thread failed to write to RocksDB. "
- "Error type = %s, status code = %d, status = %s",
- get_rdb_io_error_string(err_type), status.code(),
- status.ToString().c_str());
+ rdb_log_status_error(status, "BG thread failed to write to RocksDB");
break;
}
case RDB_IO_ERROR_GENERAL: {
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: failed on I/O. Error type = %s, "
- "status code = %d, status = %s",
- get_rdb_io_error_string(err_type), status.code(),
- status.ToString().c_str());
+ rdb_log_status_error(status, "failed on I/O");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on I/O error.");
abort_with_stack_traces();
@@ -10376,33 +11862,21 @@ void rdb_handle_io_error(const rocksdb::Status status,
break;
}
} else if (status.IsCorruption()) {
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: data corruption detected! Error type = %s, "
- "status code = %d, status = %s",
- get_rdb_io_error_string(err_type), status.code(),
- status.ToString().c_str());
+ rdb_log_status_error(status, "data corruption detected!");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting because of data corruption.");
abort_with_stack_traces();
} else if (!status.ok()) {
switch (err_type) {
case RDB_IO_ERROR_DICT_COMMIT: {
- /* NO_LINT_DEBUG */
- sql_print_error("MyRocks: failed to write to WAL (dictionary). "
- "Error type = %s, status code = %d, status = %s",
- get_rdb_io_error_string(err_type), status.code(),
- status.ToString().c_str());
+ rdb_log_status_error(status, "Failed to write to WAL (dictionary)");
/* NO_LINT_DEBUG */
sql_print_error("MyRocks: aborting on WAL write error.");
abort_with_stack_traces();
break;
}
default:
- /* NO_LINT_DEBUG */
- sql_print_warning("MyRocks: failed to read/write in RocksDB. "
- "Error type = %s, status code = %d, status = %s",
- get_rdb_io_error_string(err_type), status.code(),
- status.ToString().c_str());
+ rdb_log_status_error(status, "Failed to read/write in RocksDB");
break;
}
}
@@ -10484,13 +11958,52 @@ void rocksdb_set_rate_limiter_bytes_per_sec(
}
}
+void rocksdb_set_sst_mgr_rate_bytes_per_sec(
+ my_core::THD *const thd,
+ my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ const uint64_t new_val = *static_cast<const uint64_t *>(save);
+
+ if (new_val != rocksdb_sst_mgr_rate_bytes_per_sec) {
+ rocksdb_sst_mgr_rate_bytes_per_sec = new_val;
+
+ rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond(
+ rocksdb_sst_mgr_rate_bytes_per_sec);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var,
void *var_ptr, const void *save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
const uint64_t new_val = *static_cast<const uint64_t *>(save);
if (rocksdb_delayed_write_rate != new_val) {
rocksdb_delayed_write_rate = new_val;
- rocksdb_db_options.delayed_write_rate = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"delayed_write_rate", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning("MyRocks: failed to update delayed_write_rate. "
+ "status code = %d, status = %s",
+ s.code(), s.ToString().c_str());
+ }
}
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save) {
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+ const uint32_t new_val = *static_cast<const uint32_t *>(save);
+ if (rocksdb_max_latest_deadlocks != new_val) {
+ rocksdb_max_latest_deadlocks = new_val;
+ rdb->SetDeadlockInfoBufferSize(rocksdb_max_latest_deadlocks);
+ }
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
void rdb_set_collation_exception_list(const char *const exception_list) {
@@ -10535,18 +12048,144 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var
*static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
}
-static void rocksdb_set_max_background_compactions(
- THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr,
- const void *const save) {
+void rocksdb_set_bulk_load_allow_unsorted(
+ THD *const thd,
+ struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)),
+ void *const var_ptr, const void *const save) {
+ if (THDVAR(thd, bulk_load)) {
+ my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET",
+ "Cannot change this setting while bulk load is enabled");
+ } else {
+ *static_cast<bool *>(var_ptr) = *static_cast<const bool *>(save);
+ }
+}
+
+static void rocksdb_set_max_background_jobs(THD *thd,
+ struct st_mysql_sys_var *const var,
+ void *const var_ptr,
+ const void *const save) {
DBUG_ASSERT(save != nullptr);
+ DBUG_ASSERT(rocksdb_db_options != nullptr);
+ DBUG_ASSERT(rocksdb_db_options->env != nullptr);
RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
- rocksdb_db_options.max_background_compactions =
- *static_cast<const int *>(save);
- rocksdb_db_options.env->SetBackgroundThreads(
- rocksdb_db_options.max_background_compactions,
- rocksdb::Env::Priority::LOW);
+ const int new_val = *static_cast<const int *>(save);
+
+ if (rocksdb_db_options->max_background_jobs != new_val) {
+ rocksdb_db_options->max_background_jobs = new_val;
+ rocksdb::Status s =
+ rdb->SetDBOptions({{"max_background_jobs", std::to_string(new_val)}});
+
+ if (!s.ok()) {
+ /* NO_LINT_DEBUG */
+ sql_print_warning("MyRocks: failed to update max_background_jobs. "
+ "Status code = %d, status = %s.",
+ s.code(), s.ToString().c_str());
+ }
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+}
+
+void rocksdb_set_update_cf_options(THD *const /* unused */,
+ struct st_mysql_sys_var *const /* unused */,
+ void *const var_ptr,
+ const void *const save) {
+ const char *const val = *static_cast<const char *const *>(save);
+
+ if (!val) {
+ // NO_LINT_DEBUG
+ sql_print_warning("MyRocks: NULL is not a valid option for updates to "
+ "column family settings.");
+ return;
+ }
+
+ RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);
+
+ DBUG_ASSERT(val != nullptr);
+
+ // Do the real work of applying the changes.
+ Rdb_cf_options::Name_to_config_t option_map;
+
+ // Basic sanity checking and parsing the options into a map. If this fails
+ // then there's no point to proceed.
+ if (!Rdb_cf_options::parse_cf_options(val, &option_map)) {
+ *reinterpret_cast<char**>(var_ptr) = nullptr;
+
+ // NO_LINT_DEBUG
+ sql_print_warning("MyRocks: failed to parse the updated column family "
+ "options = '%s'.", val);
+ RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
+ return;
+ }
+
+ // For each CF we have, see if we need to update any settings.
+ for (const auto &cf_name : cf_manager.get_cf_names()) {
+ DBUG_ASSERT(!cf_name.empty());
+
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ DBUG_ASSERT(cfh != nullptr);
+
+ const auto it = option_map.find(cf_name);
+ std::string per_cf_options = (it != option_map.end()) ? it->second : "";
+
+ if (!per_cf_options.empty()) {
+ Rdb_cf_options::Name_to_config_t opt_map;
+ rocksdb::Status s = rocksdb::StringToMap(per_cf_options, &opt_map);
+
+ if (s != rocksdb::Status::OK()) {
+ // NO_LINT_DEBUG
+ sql_print_warning("MyRocks: failed to convert the options for column "
+ "family '%s' to a map. %s", cf_name.c_str(),
+ s.ToString().c_str());
+ } else {
+ DBUG_ASSERT(rdb != nullptr);
+
+ // Finally we can apply the options.
+ s = rdb->SetOptions(cfh, opt_map);
+
+ if (s != rocksdb::Status::OK()) {
+ // NO_LINT_DEBUG
+ sql_print_warning("MyRocks: failed to apply the options for column "
+ "family '%s'. %s", cf_name.c_str(),
+ s.ToString().c_str());
+ } else {
+ // NO_LINT_DEBUG
+ sql_print_information("MyRocks: options for column family '%s' "
+ "have been successfully updated.",
+ cf_name.c_str());
+
+ // Make sure that data is internally consistent as well and update
+ // the CF options. This is necessary also to make sure that the CF
+ // options will be correctly reflected in the relevant table:
+ // ROCKSDB_CF_OPTIONS in INFORMATION_SCHEMA.
+ rocksdb::ColumnFamilyOptions cf_options = rdb->GetOptions(cfh);
+ std::string updated_options;
+
+ s = rocksdb::GetStringFromColumnFamilyOptions(&updated_options,
+ cf_options);
+
+ DBUG_ASSERT(s == rocksdb::Status::OK());
+ DBUG_ASSERT(!updated_options.empty());
+
+ cf_manager.update_options_map(cf_name, updated_options);
+ }
+ }
+ }
+ }
+
+ // Reset the pointers regardless of how much success we had with updating
+ // the CF options. This will results in consistent behavior and avoids
+ // dealing with cases when only a subset of CF-s was successfully updated.
+ if (val) {
+ *reinterpret_cast<char**>(var_ptr) = my_strdup(val, MYF(0));
+ } else {
+ *reinterpret_cast<char**>(var_ptr) = nullptr;
+ }
+
+ // Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to
+ // free up resources used before.
RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex);
}
@@ -10615,6 +12254,19 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
} // namespace myrocks
+/**
+ Construct and emit duplicate key error message using information
+ from table's record buffer.
+
+ @sa print_keydup_error(table, key, msg, errflag, thd, org_table_name).
+*/
+
+void print_keydup_error(TABLE *table, KEY *key, myf errflag,
+ const THD *thd, const char *org_table_name)
+{
+ print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
+}
+
/*
Register the storage engine plugin outside of myrocks namespace
so that mysql_declare_plugin does not get confused when it does
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 5852ad4e954..d064988d0f3 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -22,6 +22,7 @@
/* C++ standard header files */
#include <set>
#include <string>
+#include <unordered_map>
#include <unordered_set>
#include <vector>
@@ -41,6 +42,7 @@
/* RocksDB header files */
#include "rocksdb/cache.h"
#include "rocksdb/perf_context.h"
+#include "rocksdb/sst_file_manager.h"
#include "rocksdb/statistics.h"
#include "rocksdb/utilities/options_util.h"
#include "rocksdb/utilities/transaction_db.h"
@@ -49,6 +51,7 @@
/* MyRocks header files */
#include "./rdb_comparator.h"
#include "./rdb_index_merge.h"
+#include "./rdb_io_watchdog.h"
#include "./rdb_perf_context.h"
#include "./rdb_sst_info.h"
#include "./rdb_utils.h"
@@ -96,12 +99,12 @@ std::vector<Rdb_trx_info> rdb_get_all_trx_info();
- the name used to set the default column family parameter for per-cf
arguments.
*/
-const char *const DEFAULT_CF_NAME = "default";
+extern const std::string DEFAULT_CF_NAME;
/*
This is the name of the Column Family used for storing the data dictionary.
*/
-const char *const DEFAULT_SYSTEM_CF_NAME = "__system__";
+extern const std::string DEFAULT_SYSTEM_CF_NAME;
/*
This is the name of the hidden primary key for tables with no pk.
@@ -110,9 +113,9 @@ const char *const HIDDEN_PK_NAME = "HIDDEN_PK_ID";
/*
Column family name which means "put this index into its own column family".
- See Rdb_cf_manager::get_per_index_cf_name().
+ DEPRECATED!!!
*/
-const char *const PER_INDEX_CF_NAME = "$per_index_cf";
+extern const std::string PER_INDEX_CF_NAME;
/*
Name for the background thread.
@@ -138,7 +141,7 @@ const char RDB_PER_PARTITION_QUALIFIER_NAME_SEP = '_';
- p0_cfname=foo
- p3_tts_col=bar
*/
-const char RDB_PER_PARTITION_QUALIFIER_VALUE_SEP = '=';
+const char RDB_QUALIFIER_VALUE_SEP = '=';
/*
Separator between multiple qualifier assignments. Sample usage:
@@ -153,6 +156,16 @@ const char RDB_QUALIFIER_SEP = ';';
const char *const RDB_CF_NAME_QUALIFIER = "cfname";
/*
+ Qualifier name for a custom per partition ttl duration.
+*/
+const char *const RDB_TTL_DURATION_QUALIFIER = "ttl_duration";
+
+/*
+ Qualifier name for a custom per partition ttl duration.
+*/
+const char *const RDB_TTL_COL_QUALIFIER = "ttl_col";
+
+/*
Default, minimal valid, and maximum valid sampling rate values when collecting
statistics about table.
*/
@@ -180,13 +193,17 @@ const char *const RDB_CF_NAME_QUALIFIER = "cfname";
CPU-s and derive the values from there. This however has its own set of
problems and we'll choose simplicity for now.
*/
-#define MAX_BACKGROUND_COMPACTIONS 64
-#define MAX_BACKGROUND_FLUSHES 64
+#define MAX_BACKGROUND_JOBS 64
#define DEFAULT_SUBCOMPACTIONS 1
#define MAX_SUBCOMPACTIONS 64
/*
+ Default value for rocksdb_sst_mgr_rate_bytes_per_sec = 0 (disabled).
+*/
+#define DEFAULT_SST_MGR_RATE_BYTES_PER_SEC 0
+
+/*
Defines the field sizes for serializing XID object to a string representation.
string byte format: [field_size: field_value, ...]
[
@@ -228,19 +245,56 @@ enum collations_used {
#define ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN sizeof(longlong)
/*
- MyRocks specific error codes. NB! Please make sure that you will update
- HA_ERR_ROCKSDB_LAST when adding new ones.
+ Bytes used to store TTL, in the beginning of all records for tables with TTL
+ enabled.
*/
-#define HA_ERR_ROCKSDB_UNIQUE_NOT_SUPPORTED (HA_ERR_LAST + 1)
-#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_LAST + 2)
-#define HA_ERR_ROCKSDB_TOO_MANY_LOCKS (HA_ERR_LAST + 3)
-#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED (HA_ERR_LAST + 4)
-#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED (HA_ERR_LAST + 5)
-#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED
+#define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong)
-inline bool looks_like_per_index_cf_typo(const char *const name) {
- return (name && name[0] == '$' && strcmp(name, PER_INDEX_CF_NAME));
-}
+/*
+ Maximum index prefix length in bytes.
+*/
+#define MAX_INDEX_COL_LEN_LARGE 3072
+#define MAX_INDEX_COL_LEN_SMALL 767
+
+/*
+ MyRocks specific error codes. NB! Please make sure that you will update
+ HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in
+ rdb_error_messages to include any new error messages.
+*/
+#define HA_ERR_ROCKSDB_FIRST (HA_ERR_LAST + 1)
+#define HA_ERR_ROCKSDB_PK_REQUIRED (HA_ERR_ROCKSDB_FIRST + 0)
+#define HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED \
+ (HA_ERR_ROCKSDB_FIRST + 1)
+#define HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED \
+ (HA_ERR_ROCKSDB_FIRST + 2)
+#define HA_ERR_ROCKSDB_COMMIT_FAILED (HA_ERR_ROCKSDB_FIRST + 3)
+#define HA_ERR_ROCKSDB_BULK_LOAD (HA_ERR_ROCKSDB_FIRST + 4)
+#define HA_ERR_ROCKSDB_CORRUPT_DATA (HA_ERR_ROCKSDB_FIRST + 5)
+#define HA_ERR_ROCKSDB_CHECKSUM_MISMATCH (HA_ERR_ROCKSDB_FIRST + 6)
+#define HA_ERR_ROCKSDB_INVALID_TABLE (HA_ERR_ROCKSDB_FIRST + 7)
+#define HA_ERR_ROCKSDB_PROPERTIES (HA_ERR_ROCKSDB_FIRST + 8)
+#define HA_ERR_ROCKSDB_MERGE_FILE_ERR (HA_ERR_ROCKSDB_FIRST + 9)
+/*
+ Each error code below maps to a RocksDB status code found in:
+ rocksdb/include/rocksdb/status.h
+*/
+#define HA_ERR_ROCKSDB_STATUS_NOT_FOUND (HA_ERR_LAST + 10)
+#define HA_ERR_ROCKSDB_STATUS_CORRUPTION (HA_ERR_LAST + 11)
+#define HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED (HA_ERR_LAST + 12)
+#define HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT (HA_ERR_LAST + 13)
+#define HA_ERR_ROCKSDB_STATUS_IO_ERROR (HA_ERR_LAST + 14)
+#define HA_ERR_ROCKSDB_STATUS_NO_SPACE (HA_ERR_LAST + 15)
+#define HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS (HA_ERR_LAST + 16)
+#define HA_ERR_ROCKSDB_STATUS_INCOMPLETE (HA_ERR_LAST + 17)
+#define HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS (HA_ERR_LAST + 18)
+#define HA_ERR_ROCKSDB_STATUS_TIMED_OUT (HA_ERR_LAST + 19)
+#define HA_ERR_ROCKSDB_STATUS_ABORTED (HA_ERR_LAST + 20)
+#define HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT (HA_ERR_LAST + 21)
+#define HA_ERR_ROCKSDB_STATUS_BUSY (HA_ERR_LAST + 22)
+#define HA_ERR_ROCKSDB_STATUS_DEADLOCK (HA_ERR_LAST + 23)
+#define HA_ERR_ROCKSDB_STATUS_EXPIRED (HA_ERR_LAST + 24)
+#define HA_ERR_ROCKSDB_STATUS_TRY_AGAIN (HA_ERR_LAST + 25)
+#define HA_ERR_ROCKSDB_LAST HA_ERR_ROCKSDB_STATUS_TRY_AGAIN
/**
@brief
@@ -253,12 +307,21 @@ struct Rdb_table_handler {
char *m_table_name;
uint m_table_name_length;
int m_ref_count;
+ atomic_stat<int> m_lock_wait_timeout_counter;
+ atomic_stat<int> m_deadlock_counter;
my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock
/* Stores cumulative table statistics */
my_io_perf_atomic_t m_io_perf_read;
+ my_io_perf_atomic_t m_io_perf_write;
Rdb_atomic_perf_counters m_table_perf_context;
+
+ /* Stores cached memtable estimate statistics */
+ std::atomic_uint m_mtcache_lock;
+ uint64_t m_mtcache_count;
+ uint64_t m_mtcache_size;
+ uint64_t m_mtcache_last_update;
};
class Rdb_key_def;
@@ -297,15 +360,19 @@ typedef struct _gl_index_id_s {
}
} GL_INDEX_ID;
-enum operation_type {
+enum operation_type : int {
ROWS_DELETED = 0,
ROWS_INSERTED,
ROWS_READ,
ROWS_UPDATED,
ROWS_DELETED_BLIND,
+ ROWS_EXPIRED,
+ ROWS_HIDDEN_NO_SNAPSHOT,
ROWS_MAX
};
+enum query_type : int { QUERIES_POINT = 0, QUERIES_RANGE, QUERIES_MAX };
+
#if defined(HAVE_SCHED_GETCPU)
#define RDB_INDEXER get_sched_indexer_t
#else
@@ -319,6 +386,10 @@ struct st_global_stats {
// system_rows_ stats are only for system
// tables. They are not counted in rows_* stats.
ib_counter_t<ulonglong, 64, RDB_INDEXER> system_rows[ROWS_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> queries[QUERIES_MAX];
+
+ ib_counter_t<ulonglong, 64, RDB_INDEXER> covered_secondary_key_lookups;
};
/* Struct used for exporting status to MySQL */
@@ -328,11 +399,45 @@ struct st_export_stats {
ulonglong rows_read;
ulonglong rows_updated;
ulonglong rows_deleted_blind;
+ ulonglong rows_expired;
+ ulonglong rows_hidden_no_snapshot;
ulonglong system_rows_deleted;
ulonglong system_rows_inserted;
ulonglong system_rows_read;
ulonglong system_rows_updated;
+
+ ulonglong queries_point;
+ ulonglong queries_range;
+
+ ulonglong covered_secondary_key_lookups;
+};
+
+/* Struct used for exporting RocksDB memory status */
+struct st_memory_stats {
+ ulonglong memtable_total;
+ ulonglong memtable_unflushed;
+};
+
+/* Struct used for exporting RocksDB IO stalls stats */
+struct st_io_stall_stats {
+ ulonglong level0_slowdown;
+ ulonglong level0_slowdown_with_compaction;
+ ulonglong level0_numfiles;
+ ulonglong level0_numfiles_with_compaction;
+ ulonglong stop_for_pending_compaction_bytes;
+ ulonglong slowdown_for_pending_compaction_bytes;
+ ulonglong memtable_compaction;
+ ulonglong memtable_slowdown;
+ ulonglong total_stop;
+ ulonglong total_slowdown;
+
+ st_io_stall_stats()
+ : level0_slowdown(0), level0_slowdown_with_compaction(0),
+ level0_numfiles(0), level0_numfiles_with_compaction(0),
+ stop_for_pending_compaction_bytes(0),
+ slowdown_for_pending_compaction_bytes(0), memtable_compaction(0),
+ memtable_slowdown(0), total_stop(0), total_slowdown(0) {}
};
} // namespace myrocks
@@ -439,6 +544,17 @@ class ha_rocksdb : public my_core::handler {
*/
uchar *m_pack_buffer;
+ /*
+ Pointer to the original TTL timestamp value (8 bytes) during UPDATE.
+ */
+ char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD];
+ /*
+ The TTL timestamp value can change if the explicit TTL column is
+ updated. If we detect this when updating the PK, we indicate it here so
+ we know we must always update any SK's.
+ */
+ bool m_ttl_bytes_updated;
+
/* rowkey of the last record we've read, in StorageFormat. */
String m_last_rowkey;
@@ -484,7 +600,9 @@ class ha_rocksdb : public my_core::handler {
bool m_update_scope_is_valid;
/* SST information used for bulk loading the primary key */
- std::shared_ptr<Rdb_sst_info> m_sst_info;
+ std::unique_ptr<Rdb_sst_info> m_sst_info;
+ /* External merge sorts for bulk load: key ID -> merge sort instance */
+ std::unordered_map<GL_INDEX_ID, Rdb_index_merge> m_key_merge;
Rdb_transaction *m_bulk_load_tx;
/* Mutex to protect finalizing bulk load */
mysql_mutex_t m_bulk_load_mutex;
@@ -504,14 +622,14 @@ class ha_rocksdb : public my_core::handler {
void setup_iterator_for_rnd_scan();
void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *const slice)
MY_ATTRIBUTE((__nonnull__)) {
- setup_scan_iterator(kd, slice, false, false, 0);
+ setup_scan_iterator(kd, slice, false, 0);
}
bool is_ascending(const Rdb_key_def &keydef,
enum ha_rkey_function find_flag) const
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice,
- const bool use_all_keys, const bool is_ascending,
- const uint eq_cond_len) MY_ATTRIBUTE((__nonnull__));
+ const bool use_all_keys, const uint eq_cond_len)
+ MY_ATTRIBUTE((__nonnull__));
void release_scan_iterator(void);
rocksdb::Status
@@ -520,13 +638,15 @@ class ha_rocksdb : public my_core::handler {
const rocksdb::Slice &key, std::string *const value) const;
int get_row_by_rowid(uchar *const buf, const char *const rowid,
- const uint rowid_size, const bool skip_lookup = false)
+ const uint rowid_size, const bool skip_lookup = false,
+ const bool skip_ttl_check = true)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int get_row_by_rowid(uchar *const buf, const uchar *const rowid,
- const uint rowid_size, const bool skip_lookup = false)
+ const uint rowid_size, const bool skip_lookup = false,
+ const bool skip_ttl_check = true)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)) {
return get_row_by_rowid(buf, reinterpret_cast<const char *>(rowid),
- rowid_size, skip_lookup);
+ rowid_size, skip_lookup, skip_ttl_check);
}
void update_auto_incr_val();
@@ -580,6 +700,13 @@ class ha_rocksdb : public my_core::handler {
void setup_read_decoders();
/*
+ For the active index, indicates which columns must be covered for the
+ current lookup to be covered. If the bitmap field is null, that means this
+ index does not cover the current lookup for any record.
+ */
+ MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0};
+
+ /*
Number of bytes in on-disk (storage) record format that are used for
storing SQL NULL flags.
*/
@@ -759,14 +886,6 @@ public:
uchar *const buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- void convert_record_to_storage_format(const rocksdb::Slice &pk_packed_slice,
- Rdb_string_writer *const pk_unpack_info,
- rocksdb::Slice *const packed_rec)
- MY_ATTRIBUTE((__nonnull__));
-
- static const std::string gen_cf_name_qualifier_for_partition(
- const std::string &s);
-
static const std::vector<std::string> parse_into_tokens(const std::string &s,
const char delim);
@@ -785,6 +904,9 @@ public:
const Rdb_tbl_def *const tbl_def_arg)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ static const std::string get_table_comment(const TABLE *const table_arg)
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
static bool is_hidden_pk(const uint index, const TABLE *const table_arg,
const Rdb_tbl_def *const tbl_def_arg)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -821,11 +943,7 @@ public:
DBUG_RETURN(MAX_REF_PARTS);
}
- uint max_supported_key_part_length() const override {
- DBUG_ENTER_FUNC();
-
- DBUG_RETURN(2048);
- }
+ uint max_supported_key_part_length() const override;
/** @brief
unireg.cc will call this to make sure that the storage engine can handle
@@ -916,7 +1034,6 @@ private:
struct key_def_cf_info {
rocksdb::ColumnFamilyHandle *cf_handle;
bool is_reverse_cf;
- bool is_auto_cf;
bool is_per_partition_cf;
};
@@ -926,12 +1043,23 @@ private:
const uchar *old_data;
rocksdb::Slice new_pk_slice;
rocksdb::Slice old_pk_slice;
+ rocksdb::Slice old_pk_rec;
// "unpack_info" data for the new PK value
Rdb_string_writer *new_pk_unpack_info;
longlong hidden_pk_id;
bool skip_unique_check;
+
+ // In certain cases, TTL is enabled on a table, as well as an explicit TTL
+ // column. The TTL column can be part of either the key or the value part
+ // of the record. If it is part of the key, we store the offset here.
+ //
+ // Later on, we use this offset to store the TTL in the value part of the
+ // record, which we can then access in the compaction filter.
+ //
+ // Set to UINT_MAX by default to indicate that the TTL is not in key.
+ uint ttl_pk_offset = UINT_MAX;
};
/*
@@ -987,6 +1115,21 @@ private:
const KEY *const new_key) const;
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ int compare_keys(const KEY *const old_key, const KEY *const new_key) const
+ MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+
+ int convert_record_to_storage_format(const struct update_row_info &row_info,
+ rocksdb::Slice *const packed_rec)
+ MY_ATTRIBUTE((__nonnull__));
+
+ bool should_hide_ttl_rec(const Rdb_key_def &kd,
+ const rocksdb::Slice &ttl_rec_val,
+ const int64_t curr_ts)
+ MY_ATTRIBUTE((__warn_unused_result__));
+ void rocksdb_skip_expired_records(const Rdb_key_def &kd,
+ rocksdb::Iterator *const iter,
+ bool seek_backward);
+
int index_first_intern(uchar *buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int index_last_intern(uchar *buf)
@@ -1006,7 +1149,7 @@ private:
MY_ATTRIBUTE((__warn_unused_result__));
int check_and_lock_sk(const uint &key_id,
const struct update_row_info &row_info,
- bool *const found) const
+ bool *const found)
MY_ATTRIBUTE((__warn_unused_result__));
int check_uniqueness_and_lock(const struct update_row_info &row_info,
bool *const pk_changed)
@@ -1018,8 +1161,10 @@ private:
struct unique_sk_buf_info *sk_info)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd,
- const rocksdb::Slice &key, const rocksdb::Slice &value)
+ const rocksdb::Slice &key, const rocksdb::Slice &value,
+ bool sort)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
+ void update_bytes_written(ulonglong bytes_written);
int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info,
const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__));
int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd,
@@ -1031,18 +1176,22 @@ private:
int read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter,
const bool &using_full_key,
- const rocksdb::Slice &key_slice) const
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int read_before_key(const Rdb_key_def &kd, const bool &using_full_key,
- const rocksdb::Slice &key_slice)
+ const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
- int read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice)
+ int read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice,
+ const int64_t ttl_filter_ts)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
int position_to_correct_key(
const Rdb_key_def &kd, const enum ha_rkey_function &find_flag,
const bool &full_key_match, const uchar *const key,
const key_part_map &keypart_map, const rocksdb::Slice &key_slice,
- bool *const move_forward) MY_ATTRIBUTE((__warn_unused_result__));
+ bool *const move_forward, const int64_t ttl_filter_ts)
+ MY_ATTRIBUTE((__warn_unused_result__));
int read_row_from_primary_key(uchar *const buf)
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -1122,7 +1271,7 @@ public:
int check(THD *const thd, HA_CHECK_OPT *const check_opt) override
MY_ATTRIBUTE((__warn_unused_result__));
- void remove_rows(Rdb_tbl_def *const tbl);
+ int remove_rows(Rdb_tbl_def *const tbl);
ha_rows records_in_range(uint inx, key_range *const min_key,
key_range *const max_key) override
MY_ATTRIBUTE((__warn_unused_result__));
@@ -1152,6 +1301,10 @@ public:
bool get_error_message(const int error, String *const buf) override
MY_ATTRIBUTE((__nonnull__));
+ static int rdb_error_to_mysql(const rocksdb::Status &s,
+ const char *msg = nullptr)
+ MY_ATTRIBUTE((__warn_unused_result__));
+
void get_auto_increment(ulonglong offset, ulonglong increment,
ulonglong nb_desired_values,
ulonglong *const first_value,
@@ -1258,5 +1411,4 @@ private:
Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &);
Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &);
};
-
} // namespace myrocks
diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h
index a4f1b2039b5..85c3968cc99 100644
--- a/storage/rocksdb/ha_rocksdb_proto.h
+++ b/storage/rocksdb/ha_rocksdb_proto.h
@@ -70,7 +70,19 @@ rocksdb::TransactionDB *rdb_get_rocksdb_db();
class Rdb_cf_manager;
Rdb_cf_manager &rdb_get_cf_manager();
-rocksdb::BlockBasedTableOptions &rdb_get_table_options();
+const rocksdb::BlockBasedTableOptions &rdb_get_table_options();
+bool rdb_is_ttl_enabled();
+bool rdb_is_ttl_read_filtering_enabled();
+#ifndef NDEBUG
+int rdb_dbug_set_ttl_rec_ts();
+int rdb_dbug_set_ttl_snapshot_ts();
+int rdb_dbug_set_ttl_read_filter_ts();
+bool rdb_dbug_set_ttl_ignore_pk();
+#endif
+
+enum operation_type : int;
+void rdb_update_global_stats(const operation_type &type, uint count,
+ bool is_system_table = false);
class Rdb_dict_manager;
Rdb_dict_manager *rdb_get_dict_manager(void)
@@ -83,5 +95,4 @@ Rdb_ddl_manager *rdb_get_ddl_manager(void)
class Rdb_binlog_manager;
Rdb_binlog_manager *rdb_get_binlog_manager(void)
MY_ATTRIBUTE((__warn_unused_result__));
-
} // namespace myrocks
diff --git a/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
new file mode 100644
index 00000000000..e96eb573c1f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc
@@ -0,0 +1,21 @@
+#
+# A helper include file for prefix index index-only query tests
+#
+# Parameters:
+# $prefix_index_check_title - title of the test
+# $prefix_index_check_query - test query
+# $prefix_index_check_read_avoided_delta - expected change of
+# 'rocksdb_covered_secondary_key_lookups' status variable
+# value after running the query
+
+--let $show_count_statement = show status like 'rocksdb_covered_secondary_key_lookups'
+
+--echo # $prefix_index_check_title
+--let $base_count = query_get_value($show_count_statement, Value, 1)
+
+--eval $prefix_index_check_query
+
+--let $count = query_get_value($show_count_statement, Value, 1)
+--let $assert_text= $prefix_index_check_title: $prefix_index_check_read_avoided_delta rocksdb_covered_secondary_key_lookups
+--let $assert_cond= $count - $base_count = $prefix_index_check_read_avoided_delta
+--source include/assert.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/my.cnf b/storage/rocksdb/mysql-test/rocksdb/my.cnf
index 2beaf514cee..6f1722addc5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/my.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb/my.cnf
@@ -10,8 +10,11 @@ explicit-defaults-for-timestamp=1
loose-rocksdb_lock_wait_timeout=1
loose-rocksdb_strict_collation_check=0
+# MariaDB: speed up the tests:
loose-rocksdb-flush-log-at-trx-commit=0
+loose-rocksdb_force_compute_memtable_stats_cachetime=0
+
# The following is to get rid of the harmless
# "Deadlock found when trying to get lock" errors, see MDEV-12285.
log-warnings=1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
index 06452a5437f..7b15ed47d44 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result
@@ -4,20 +4,20 @@ CREATE DATABASE mysqlslap;
USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
# 2PC enabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
## 2PC + durability + single thread
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 1000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 1000 then 'true' else 'false' end
-false
+true
## 2PC + durability + group commit
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end
-false
+true
# 2PC enabled, MyRocks durability disabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
@@ -28,16 +28,16 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa
case when variable_value-@c = 0 then 'true' else 'false' end
true
# 2PC disabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
-false
+true
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
-false
+true
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
index 2aeeda4cfe6..7ed8375b7a0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result
@@ -289,12 +289,16 @@ set global rocksdb_bulk_load=1;
connect con1,localhost,root,,;
# Switch to connection con1
connection con1;
-show global variables like 'rocksdb_bulk_load';
+show global variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load ON
-show session variables like 'rocksdb_bulk_load';
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
+show session variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load ON
+rocksdb_bulk_load_allow_unsorted OFF
+rocksdb_bulk_load_size 1000
CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
INSERT INTO t1 VALUES (1,1);
# Disconnecting on con1
@@ -341,10 +345,11 @@ SET @prior_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
SET global rocksdb_strict_collation_check = off;
SET session rocksdb_merge_combine_read_size = 566;
-SET session rocksdb_merge_buf_size = 336;
-show variables like '%rocksdb_bulk_load%';
+SET session rocksdb_merge_buf_size = 340;
+show variables like 'rocksdb_bulk_load%';
Variable_name Value
rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_unsorted OFF
rocksdb_bulk_load_size 1000
CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
@@ -408,3 +413,46 @@ select 1300 < 1300 * 1.5 as "same";
same
1
DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,3,3);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ `c` int(11) DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kbc` (`b`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ALTER TABLE t1 DROP INDEX kbc, ADD INDEX kbc(b,c), ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX kbc;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b varchar(10),
+index kb(b(5))
+) ENGINE = ROCKSDB charset utf8 collate utf8_bin;
+INSERT INTO t1 (a,b) VALUES (1,'1111122222');
+INSERT INTO t1 (a,b) VALUES (2,'2222233333');
+INSERT INTO t1 (a,b) VALUES (3,'3333344444');
+ALTER TABLE t1 DROP INDEX kb, ADD INDEX kb(b(8)), ALGORITHM=INPLACE;
+SELECT * FROM t1 FORCE INDEX(kb);
+a b
+1 1111122222
+2 2222233333
+3 3333344444
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` varchar(10) COLLATE utf8_bin DEFAULT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`(8))
+) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result b/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result
index 9270dca7b1d..f7c4bab685d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result
@@ -79,11 +79,25 @@ INSERT INTO t1 (a, b) VALUES (1, 5);
INSERT INTO t1 (a, b) VALUES (2, 6);
INSERT INTO t1 (a, b) VALUES (3, 7);
ALTER TABLE t1 ADD UNIQUE INDEX kb(b);
-ERROR HY000: Unique index support is disabled when the table has no primary key.
+INSERT INTO t1 (a, b) VALUES (4, 8);
+INSERT INTO t1 (a, b) VALUES (5, 5);
+ERROR 23000: Duplicate entry '5' for key 'kb'
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` int(11) DEFAULT NULL,
- `b` int(11) DEFAULT NULL
+ `b` int(11) DEFAULT NULL,
+ UNIQUE KEY `kb` (`b`)
) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,2,2);
+ALTER TABLE t1 DROP INDEX kbc, ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE;
+ERROR 23000: Duplicate entry '2-2' for key 'kbc'
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result
index 34a14ff39d8..d86792a6469 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result
@@ -229,7 +229,20 @@ Table Op Msg_type Msg_text
test.t1 check status OK
DROP TABLE t1, t2;
CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=rocksdb;
-ERROR HY000: Unique index support is disabled when the table has no primary key.
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+INSERT INTO t1 (a,b) VALUES (1,'c');
+ERROR 23000: Duplicate entry '1' for key 'a'
+SELECT * FROM t1;
+a b
+1 a
+2 b
+SELECT * FROM t1 WHERE a = 2;
+a b
+2 b
+EXPLAIN SELECT * FROM t1 WHERE a = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 const a a 5 const 1 NULL
+DROP TABLE t1;
CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
SHOW CREATE TABLE t1;
Table Create Table
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result
index be93cf2eead..22034d0fbba 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result
@@ -815,6 +815,404 @@ id4 int not null,
id5 int not null,
value bigint,
value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+create table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_short_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+true
+drop table if exists t1;
+drop table if exists t2;
+create table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
primary key (id1, id2, id3, id4) COMMENT 'cf_long_prefix',
index id2 (id2) COMMENT 'cf_long_prefix',
index id2_id1 (id2, id1) COMMENT 'cf_long_prefix',
@@ -839,6 +1237,404 @@ index id2_id4_id5 (id2, id4, id5) COMMENT 'cf_long_prefix',
index id3_id4 (id3, id4) COMMENT 'cf_long_prefix',
index id3_id5 (id3, id5) COMMENT 'cf_long_prefix'
) engine=ROCKSDB;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+true
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+drop table if exists t1;
+drop table if exists t2;
+create table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
+create table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_long_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
insert t1
select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
from seq_1_to_10000;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result
index 18f007be4b2..b3336aec6ea 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result
@@ -815,6 +815,404 @@ id4 int not null,
id5 int not null,
value bigint,
value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+create table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_short_prefix',
+index id2 (id2) COMMENT 'rev:cf_short_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_short_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_short_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_short_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_short_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_short_prefix'
+) engine=ROCKSDB;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+drop table if exists t1;
+drop table if exists t2;
+create table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
primary key (id1, id2, id3, id4) COMMENT 'cf_long_prefix',
index id2 (id2) COMMENT 'cf_long_prefix',
index id2_id1 (id2, id1) COMMENT 'cf_long_prefix',
@@ -839,6 +1237,404 @@ index id2_id4_id5 (id2, id4, id5) COMMENT 'cf_long_prefix',
index id3_id4 (id3, id4) COMMENT 'cf_long_prefix',
index id3_id5 (id3, id5) COMMENT 'cf_long_prefix'
) engine=ROCKSDB;
+call bloom_start();
+select count(*) from t1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(PRIMARY) where id1 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2 >= 1;
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index(id3_id4) where id3 >= '1';
+count(*)
+10000
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=2 and id1=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=24 and id1=12;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=88 and id1=44;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=100 and id1=50;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index(id2_id1) where id2=428 and id1=214;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=1 and id4=1 and id5=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=23 and id4=115 and id5=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=500 and id4=2500 and id5=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4_id5) where id2=601 and id4=3005 and id5=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id4) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=1 and id3='1' and id1=1 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36 and id3='36' and id1=18 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124' and id1=62 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=888 and id3='888' and id1=444 order by id4;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=124 and id3='124';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=1 and id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2_id3) where id2=12 and id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=1 and id3='1';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id3_id2) where id2=23 and id3='23';
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=1;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=12;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=23;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=100;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (PRIMARY) where id1=234;
+count(*)
+10
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=36;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2_id3_id1_id4) where id2=234;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=1 and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=23 and id4=115;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=500 and id4=2500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id2) where id2=601 and id4=3005;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='1' and id4=1;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id4) where id3='12' and id4=60;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=1;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=23;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=345;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t1 force index (id2) where id2=456;
+count(*)
+5
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='100' and id5=500;
+count(*)
+1
+call bloom_end();
+checked
+false
+call bloom_start();
+select count(*) from t2 force index (id3_id5) where id3='240' and id5=1200;
+count(*)
+1
+call bloom_end();
+checked
+false
+drop table if exists t1;
+drop table if exists t2;
+create table t1 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id1, id2, id3, id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id1 (id2, id1) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id3_id1_id4 (id2, id3, id1, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id2 (id3, id2) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
+create table t2 (
+id1 bigint not null,
+id2 bigint not null,
+id3 varchar(100) not null,
+id4 int not null,
+id5 int not null,
+value bigint,
+value2 varchar(100),
+primary key (id4) COMMENT 'rev:cf_long_prefix',
+index id2 (id2) COMMENT 'rev:cf_long_prefix',
+index id2_id3 (id2, id3) COMMENT 'rev:cf_long_prefix',
+index id2_id4 (id2, id4) COMMENT 'rev:cf_long_prefix',
+index id2_id4_id5 (id2, id4, id5) COMMENT 'rev:cf_long_prefix',
+index id3_id4 (id3, id4) COMMENT 'rev:cf_long_prefix',
+index id3_id5 (id3, id5) COMMENT 'rev:cf_long_prefix'
+) engine=ROCKSDB;
insert t1
select (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc"
from seq_1_to_10000;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
index d75355f599f..15f2e3f42f7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result
@@ -1,9 +1,27 @@
DROP TABLE IF EXISTS t1, t2, t3;
-CREATE TABLE t1(pk CHAR(5) PRIMARY KEY, a char(30), b char(30), key(a)) COLLATE 'latin1_bin';
-CREATE TABLE t2(pk CHAR(5) PRIMARY KEY, a char(30), b char(30), key(a)) COLLATE 'latin1_bin';
-CREATE TABLE t3(pk CHAR(5) PRIMARY KEY, a char(30), b char(30), key(a)) COLLATE 'latin1_bin'
- PARTITION BY KEY() PARTITIONS 4;
-connect other,localhost,root,,;
+Data will be ordered in ascending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+connect other,localhost,root,,
set session transaction isolation level repeatable read;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
STAT_TYPE VALUE
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
new file mode 100644
index 00000000000..be198d02aaf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result
@@ -0,0 +1,45 @@
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(10);
+INSERT INTO t1 VALUES(11);
+INSERT INTO t1 VALUES(9);
+ERROR HY000: Rows must be inserted in primary key order during bulk load operation
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+10
+11
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+SET rocksdb_bulk_load=0;
+ERROR HY000: Lost connection to MySQL server during query
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+99
+100
+101
+TRUNCATE TABLE t1;
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+INSERT INTO t1 VALUES(201);
+ERROR 23000: Failed to insert the record: the key already exists
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+pk
+200
+201
+202
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
new file mode 100644
index 00000000000..a30838b9c9f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result
@@ -0,0 +1,82 @@
+DROP TABLE IF EXISTS t1, t2, t3;
+Data will be ordered in ascending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+select count(pk) from t1;
+count(pk)
+5000000
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(pk) from t2;
+count(pk)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(pk) from t3;
+count(pk)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+longfilenamethatvalidatesthatthiswillgetdeleted.bulk_load.tmp
+test.bulk_load.tmp
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
new file mode 100644
index 00000000000..4d259b5ea2f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -0,0 +1,82 @@
+DROP TABLE IF EXISTS t1, t2, t3;
+Data will be ordered in descending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "rev:cf1",
+KEY(a)
+) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+select count(pk) from t1;
+count(pk)
+5000000
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(pk) from t2;
+count(pk)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(pk) from t3;
+count(pk)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+longfilenamethatvalidatesthatthiswillgetdeleted.bulk_load.tmp
+test.bulk_load.tmp
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
new file mode 100644
index 00000000000..d2d3befdf04
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result
@@ -0,0 +1,82 @@
+DROP TABLE IF EXISTS t1, t2, t3;
+Data will be ordered in descending order
+CREATE TABLE t1(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t2(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) COLLATE 'latin1_bin';
+CREATE TABLE t3(
+pk CHAR(5),
+a CHAR(30),
+b CHAR(30),
+PRIMARY KEY(pk) COMMENT "cf1",
+KEY(a)
+) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+select count(pk) from t1;
+count(pk)
+5000000
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(pk) from t2;
+count(pk)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(pk) from t3;
+count(pk)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+longfilenamethatvalidatesthatthiswillgetdeleted.bulk_load.tmp
+test.bulk_load.tmp
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
new file mode 100644
index 00000000000..31509227279
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result
@@ -0,0 +1,101 @@
+DROP TABLE IF EXISTS t1;
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1;
+a b
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+a b
+-3 5
+-1 3
+2 0
+4 -2
+6 -4
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1", KEY(b));
+SET rocksdb_bulk_load=1;
+SELECT * FROM t1;
+a b
+6 -4
+4 -2
+2 0
+-1 3
+-3 5
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1;
+a b
+1 1
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2;
+a b
+1 1
+SELECT * FROM t1;
+a b
+1 1
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+a b
+1 1
+2 2
+DROP TABLE t1, t2;
+CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1");
+CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "cf1");
+CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "cf1")
+PARTITION BY KEY() PARTITIONS 4;
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 0
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+STAT_TYPE VALUE
+DB_NUM_SNAPSHOTS 1
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+LOAD DATA INFILE <input_file> INTO TABLE t1;
+LOAD DATA INFILE <input_file> INTO TABLE t2;
+LOAD DATA INFILE <input_file> INTO TABLE t3;
+set rocksdb_bulk_load=0;
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+ANALYZE TABLE t1, t2, t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+test.t2 analyze status OK
+test.t3 analyze status OK
+SHOW TABLE STATUS WHERE name LIKE 't%';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+select count(a) from t1;
+count(a)
+5000000
+select count(b) from t1;
+count(b)
+5000000
+select count(a) from t2;
+count(a)
+5000000
+select count(b) from t2;
+count(b)
+5000000
+select count(a) from t3;
+count(a)
+5000000
+select count(b) from t3;
+count(b)
+5000000
+DROP TABLE t1, t2, t3;
+SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
new file mode 100644
index 00000000000..d8e5b92e897
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result
@@ -0,0 +1,4 @@
+SET rocksdb_bulk_load=1;
+SET rocksdb_bulk_load_allow_unsorted=1;
+ERROR HY000: Error when executing command SET: Cannot change this setting while bulk load is enabled
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
new file mode 100644
index 00000000000..d9d29e6ac69
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS stats_test_table;
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+SET GLOBAL rocksdb_perf_context_level=3;
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+io_write_bytes > 0
+1
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
index d75a548e6ff..7f1e3d8e53f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result
@@ -5,7 +5,7 @@ connection con1;
CREATE TABLE t1 (a INT, pk INT AUTO_INCREMENT PRIMARY KEY) ENGINE=ROCKSDB;
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1105
+ERROR: 12048
connection con2;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
STAT_TYPE VALUE
@@ -18,7 +18,7 @@ STAT_TYPE VALUE
DB_NUM_SNAPSHOTS 0
connection con1;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1105
+ERROR: 12048
connection con2;
INSERT INTO t1 (a) VALUES (1);
connection con1;
@@ -69,7 +69,7 @@ id value value2
5 5 5
6 6 6
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1105
+ERROR: 12048
connection con2;
INSERT INTO r1 values (7,7,7);
connection con1;
@@ -107,12 +107,12 @@ id value value2
7 7 7
8 8 8
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1105
+ERROR: 12048
connection con2;
INSERT INTO r1 values (9,9,9);
connection con1;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1105
+ERROR: 12048
connection con2;
INSERT INTO r1 values (10,10,10);
connection con1;
@@ -129,7 +129,7 @@ id value value2
9 9 9
10 10 10
START TRANSACTION WITH CONSISTENT SNAPSHOT;
-ERROR: 1105
+ERROR: 12048
INSERT INTO r1 values (11,11,11);
ERROR: 0
SELECT * FROM r1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
index 7458e6b72c3..90723ff762c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result
@@ -125,7 +125,7 @@ id value value2
START TRANSACTION WITH CONSISTENT SNAPSHOT;
ERROR: 0
INSERT INTO r1 values (11,11,11);
-ERROR: 1105
+ERROR: 12045
SELECT * FROM r1;
id value value2
1 1 1
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
new file mode 100644
index 00000000000..195215331b8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result
@@ -0,0 +1,73 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Eligible for optimization, access via fake_id only
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# No longer eligible for optimization since no covered bitmap was stored.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031;
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [No longer eligible for optimization since no covered bitmap was stored.: 0 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033;
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
new file mode 100644
index 00000000000..39130475349
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result
@@ -0,0 +1,1009 @@
+##
+## Using the system variable high_priority_ddl"
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 rename t1_new;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop table t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create index idx1 on t1 (i);;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop index idx1 on t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate t1;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop trigger ins_sum;;
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 1;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+set high_priority_ddl = 1;
+select @@high_priority_ddl;
+@@high_priority_ddl
+1
+lock tables t1 write;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+set high_priority_ddl = 0;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
+##
+## Using HIGH_PRIORITY syntax
+##
+create user test_user1@localhost;
+grant all on test to test_user1@localhost;
+create user test_user2@localhost;
+grant all on test to test_user2@localhost;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = alter table t1 rename t1_new;
+## high_priority_cmd = alter high_priority table t1 rename t1_new;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+begin; insert into t1 values (4); select i from t1;;
+i
+1
+2
+3
+4
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+alter table t1 rename t1_new;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 rename t1_new;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+select * from t1_new;
+i
+1
+2
+3
+drop table t1_new;
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 write;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop table t1;
+## high_priority_cmd = drop high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read; begin; insert into t1 values (4);;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop table t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority table t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = con2
+## cmd = alter table t1 modify i bigint;
+## high_priority_cmd = alter high_priority table t1 modify i bigint;
+## should_kill = 0
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: con2
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and con2 exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: con2
+alter table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+alter high_priority table t1 modify i bigint;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user2 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create index idx1 on t1 (i);
+## high_priority_cmd = create high_priority index idx1 on t1 (i);
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create index idx1 on t1 (i);;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority index idx1 on t1 (i);;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop index idx1 on t1;
+## high_priority_cmd = drop high_priority index idx1 on t1;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES MUL NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop index idx1 on t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority index idx1 on t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = truncate t1;
+## high_priority_cmd = truncate high_priority t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+truncate t1;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+truncate high_priority t1;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 1
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = drop trigger ins_sum;
+## high_priority_cmd = drop high_priority trigger ins_sum;
+## should_kill = 1
+## recreate_table = 0
+## throw_error = 1
+
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+drop trigger ins_sum;;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+drop high_priority trigger ins_sum;;
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+
+## Test parameters:
+## use_sys_var = 0;
+## con_block = con1
+## con_kill = default
+## cmd = optimize table t1;
+## high_priority_cmd = optimize high_priority table t1;
+## should_kill = 1
+## recreate_table = 1
+## throw_error = 0
+
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection: con1
+lock tables t1 read;;
+connection: default
+set lock_wait_timeout = 0.02;
+set high_priority_lock_wait_timeout = 0.02;
+describe t1;
+Field Type Null Key Default Extra
+i int(11) YES NULL
+connection: default (for show processlist)
+# both con1 and default exist
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+<Id> test_user1 <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+connection: default
+optimize table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize Error Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: test.t1
+test.t1 optimize status Operation failed
+optimize high_priority table t1;;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+connection: default (for show processlist)
+show processlist;
+Id User Host db Command Time State Info Rows examined Rows sent Tid Srv_Id
+<Id> root <Host> test <Command> <Time> <State> <Info> <RExam> <RSent> <TID> 0
+drop user test_user1@localhost;
+drop user test_user2@localhost;
+drop table if exists t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result
new file mode 100644
index 00000000000..9b62cade9ca
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result
@@ -0,0 +1,51 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout; set @prior_deadlock_detect = @@rocksdb_deadlock_detect; set global rocksdb_deadlock_detect = on; set global rocksdb_lock_wait_timeout = 100000;;
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+row_lock_deadlocks
+1
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+row_lock_deadlocks
+1
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+row_lock_deadlocks
+2
+select row_lock_deadlocks from information_schema.table_statistics where
+table_name = "t";
+row_lock_deadlocks
+2
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout; set global rocksdb_deadlock_detect = @prior_deadlock_detect;;
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
new file mode 100644
index 00000000000..d7cb89becb7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result
@@ -0,0 +1,490 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #1
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #2
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 1;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 for update;
+i
+3
+select * from t where i=2 for update;
+select * from t where i=3 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+3
+rollback;
+i
+2
+rollback;
+set global rocksdb_max_latest_deadlocks = 5;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #5
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=100 for update;
+i
+select * from t where i=101 for update;
+i
+select * from t where i=2 for update;
+select * from t where i=3 lock in share mode;
+i
+3
+select * from t where i=200 for update;
+i
+select * from t where i=201 for update;
+i
+select * from t where i=1 lock in share mode;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: PRIMARY
+TABLE NAME: test.t
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+
+*** DEADLOCK PATH
+=========================================
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: SHARED
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+---------------WAITING FOR---------------
+TXN_ID
+COLUMN FAMILY NAME: default
+KEY
+LOCK TYPE: EXCLUSIVE
+INDEX NAME: NOT FOUND; IDX_ID
+TABLE NAME: NOT FOUND; IDX_ID
+
+--------TXN_ID GOT DEADLOCK---------
+
+-------DEADLOCK EXCEEDED MAX DEPTH-------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
index fbe6f35126d..dba49d8ff41 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table.result
@@ -5,6 +5,9 @@ DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
@@ -54,20 +57,4 @@ drop table t5;
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
-Begin filtering dropped index+ 0
-Begin filtering dropped index+ 1
-Begin filtering dropped index+ 1
-Begin filtering dropped index+ 1
-Begin filtering dropped index+ 1
-Begin filtering dropped index+ 1
-Begin filtering dropped index+ 1
-Begin filtering dropped index+ 1
-Finished filtering dropped index+ 0
-Finished filtering dropped index+ 1
-Finished filtering dropped index+ 1
-Finished filtering dropped index+ 1
-Finished filtering dropped index+ 1
-Finished filtering dropped index+ 1
-Finished filtering dropped index+ 1
-Finished filtering dropped index+ 1
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
index 83d9fd9493f..fad2939d206 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result
@@ -5,6 +5,8 @@ DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
@@ -47,9 +49,15 @@ primary key (a,b) comment 'cf1',
key (b) comment 'rev:cf2'
) ENGINE=RocksDB;
DELETE FROM t5;
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
drop table t1;
drop table t2;
drop table t3;
drop table t4;
drop table t5;
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
Compacted
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
index e5237fe9b1e..7a33fa83cb4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result
@@ -1,6 +1,8 @@
call mtr.add_suppression("Column family 'cf1' not found");
call mtr.add_suppression("Column family 'rev:cf2' not found");
DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
index 954335debf2..b4cebb08bb1 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result
@@ -178,16 +178,20 @@ id1 id2 id3
9 17 9
DROP TABLE t1;
DROP TABLE t2;
+set global rocksdb_large_prefix=1;
CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+set global rocksdb_large_prefix=1;
CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
INSERT INTO t1 VALUES (1, 1, 1) ON DUPLICATE KEY UPDATE id2 = 9;
SELECT * FROM t1 WHERE id1 = 1;
id1 id2 id3
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
new file mode 100644
index 00000000000..963f9706ee8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result
@@ -0,0 +1,80 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+fake_id INT,
+bigfield VARCHAR(4096),
+PRIMARY KEY (id),
+KEY bf (bigfield(32)),
+KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+(8, 1008, REPEAT('b', 8)),
+(24, 1024, REPEAT('c', 24)),
+(31, 1031, REPEAT('d', 31)),
+(32, 1032, REPEAT('x', 32)),
+(33, 1033, REPEAT('y', 33)),
+(128, 1128, REPEAT('z', 128));
+SELECT * FROM t1;
+id fake_id bigfield
+1 1001 a
+8 1008 bbbbbbbb
+24 1024 cccccccccccccccccccccccc
+31 1031 ddddddddddddddddddddddddddddddd
+32 1032 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+33 1033 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+128 1128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+# Baseline sanity check
+no-op query
+no-op query
+include/assert.inc [Baseline sanity check: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization, access via fake_id only
+id bigfield
+31 ddddddddddddddddddddddddddddddd
+include/assert.inc [Eligible for optimization, access via fake_id only: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization, access via fake_id of big row.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization, access via fake_id of big row.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+32 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+include/assert.inc [Eligible for optimization.: 1 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+33 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+8 bbbbbbbb
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Eligible for optimization.
+id bigfield
+24 cccccccccccccccccccccccc
+include/assert.inc [Eligible for optimization.: 2 rocksdb_covered_secondary_key_lookups]
+# Not eligible for optimization.
+id bigfield
+128 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+include/assert.inc [Not eligible for optimization.: 0 rocksdb_covered_secondary_key_lookups]
+#
+# Test that multi-byte charsets are handled correctly
+#
+# Charset record obviously shorter than the prefix
+a b
+1 a
+include/assert.inc [Charset record obviously shorter than the prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record shorter than prefix
+a b
+2 cc
+include/assert.inc [Charset record shorter than prefix: 2 rocksdb_covered_secondary_key_lookups]
+# Charset record with glyphs shorter than prefix
+a b
+3 ŽŽ
+include/assert.inc [Charset record with glyphs shorter than prefix: 1 rocksdb_covered_secondary_key_lookups]
+# Charset record longer than prefix
+a b
+4 žžžž
+include/assert.inc [Charset record longer than prefix: 0 rocksdb_covered_secondary_key_lookups]
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result b/storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result
index 483be726bb3..5ffd2774ca2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result
@@ -1,16 +1,16 @@
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (b INT PRIMARY KEY);
CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, FOREIGN KEY (b) REFERENCES t1(b));
-ERROR 42000: MyRocks does not currently support foreign key constraints
+ERROR 42000: This version of MySQL doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
CREATE TABLE t2 (a INT NOT NULL, bforeign INT NOT NULL);
DROP TABLE t2;
CREATE TABLE t2 (a INT NOT NULL, foreignkey INT NOT NULL);
DROP TABLE t2;
CREATE TABLE t2 (a INT NOT NULL, bforeign INT not null, FOREIGN KEY (bforeign) REFERENCES t1(b));
-ERROR 42000: MyRocks does not currently support foreign key constraints
+ERROR 42000: This version of MySQL doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL);
ALTER TABLE t2 ADD FOREIGN KEY (b) REFERENCES t1(b);
-ERROR 42000: MyRocks does not currently support foreign key constraints
+ERROR 42000: This version of MySQL doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
DROP TABLE t2;
CREATE TABLE t2 (a INT NOT NULL);
ALTER TABLE t2 ADD bforeign INT NOT NULL;
@@ -20,6 +20,6 @@ ALTER TABLE t2 ADD foreignkey INT NOT NULL;
DROP TABLE t2;
CREATE TABLE t2 (a INT NOT NULL);
ALTER TABLE t2 ADD bforeign INT NOT NULL, ADD FOREIGN KEY (bforeign) REFERENCES t1(b);
-ERROR 42000: MyRocks does not currently support foreign key constraints
+ERROR 42000: This version of MySQL doesn't yet support 'FOREIGN KEY for the RocksDB storage engine'
DROP TABLE t2;
DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/hermitage.result b/storage/rocksdb/mysql-test/rocksdb/r/hermitage.result
index e4d080289dc..8bf2416aa78 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/hermitage.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/hermitage.result
@@ -483,7 +483,7 @@ delete from test where value = 20;
connection con1;
commit;
connection con2;
-ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors';
variable_value-@a
1
@@ -511,7 +511,7 @@ update test set value = 12 where id = 1;
connection con1;
commit;
connection con2;
-ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
commit;
connection con1;
truncate table test;
@@ -582,7 +582,7 @@ update test set value = 18 where id = 2;
commit;
connection con1;
delete from test where value = 20;
-ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
commit;
connection con1;
truncate table test;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
index b37b0d0b72d..bcb1cdd4503 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result
@@ -8,9 +8,9 @@ PRIMARY KEY (z, y) COMMENT 'zy_cf',
KEY (x)) ENGINE = ROCKSDB;
SELECT TABLE_SCHEMA,TABLE_NAME,PARTITION_NAME,INDEX_NAME,INDEX_TYPE,KV_FORMAT_VERSION,CF FROM INFORMATION_SCHEMA.ROCKSDB_DDL WHERE TABLE_NAME like 'is_ddl_t%';
TABLE_SCHEMA TABLE_NAME PARTITION_NAME INDEX_NAME INDEX_TYPE KV_FORMAT_VERSION CF
-test is_ddl_t1 NULL PRIMARY 1 11 default
-test is_ddl_t1 NULL j 2 11 default
-test is_ddl_t1 NULL k 2 11 kl_cf
+test is_ddl_t1 NULL PRIMARY 1 13 default
+test is_ddl_t1 NULL j 2 13 default
+test is_ddl_t1 NULL k 2 13 kl_cf
test is_ddl_t2 NULL PRIMARY 1 11 zy_cf
test is_ddl_t2 NULL x 2 11 default
DROP TABLE is_ddl_t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index.result b/storage/rocksdb/mysql-test/rocksdb/r/index.result
index 99390c8ceb2..b5950618285 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index.result
@@ -40,6 +40,33 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
#
# Issue #376: MyRocks: ORDER BY optimizer is unable to use the index extension
#
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
new file mode 100644
index 00000000000..22c8592ff28
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result
@@ -0,0 +1,48 @@
+CREATE TABLE t1
+(
+/* fields/keys for row retrieval tests */
+key1 INT,
+key2 INT,
+key3 INT,
+key4 INT,
+/* make rows much bigger then keys */
+filler1 CHAR(200),
+KEY(key1),
+KEY(key2)
+) ENGINE=ROCKSDB;
+CREATE TABLE t0 AS SELECT * FROM t1;
+# Printing of many insert into t0 values (....) disabled.
+# Printing of many insert into t1 select .... from t0 disabled.
+# Printing of many insert into t1 (...) values (....) disabled.
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7201
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL # Using intersect(key1,key2); Using where
+UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
+DROP TABLE t0, t1;
+create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb;
+insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300);
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+set global rocksdb_force_flush_memtable_now=1;
+explain select * from t1 where key1 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 5 const # NULL
+explain select key1,key2 from t1 where key1 = 1 or key2 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL # Using union(key1,key2); Using where
+select * from t1 where key1 = 1;
+key1 key2 key3
+1 100 100
+1 200 200
+1 300 300
+select key1,key2 from t1 where key1 = 1 or key2 = 1;
+key1 key2
+1 100
+1 200
+1 300
+drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result
new file mode 100644
index 00000000000..eab9bbc2ea7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result
@@ -0,0 +1,1419 @@
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- Index merge test 1 -------------------------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t0, t1, t2, t3, t4;
+create table t0
+(
+key1 int not null,
+key2 int not null,
+key3 int not null,
+key4 int not null,
+key5 int not null,
+key6 int not null,
+key7 int not null,
+key8 int not null,
+INDEX i1(key1),
+INDEX i2(key2),
+INDEX i3(key3),
+INDEX i4(key4),
+INDEX i5(key5),
+INDEX i6(key6),
+INDEX i7(key7),
+INDEX i8(key8)
+);
+analyze table t0;
+Table Op Msg_type Msg_text
+test.t0 analyze status OK
+explain select * from t0 where key1 < 3 or key1 > 1020;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1 i1 4 NULL 2 Using index condition
+explain
+select * from t0 where key1 < 3 or key2 > 1020;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL 2 Using sort_union(i1,i2); Using where
+select * from t0 where key1 < 3 or key2 > 1020;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+1021 1021 1021 1021 1021 1021 1021 3
+1022 1022 1022 1022 1022 1022 1022 2
+1023 1023 1023 1023 1023 1023 1023 1
+1024 1024 1024 1024 1024 1024 1024 0
+explain select * from t0 where key1 < 2 or key2 <3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain
+select * from t0 where (key1 > 30 and key1<35) or (key2 >32 and key2 < 40);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+select * from t0 where (key1 > 30 and key1<35) or (key2 >32 and key2 < 40);
+key1 key2 key3 key4 key5 key6 key7 key8
+31 31 31 31 31 31 31 993
+32 32 32 32 32 32 32 992
+33 33 33 33 33 33 33 991
+34 34 34 34 34 34 34 990
+35 35 35 35 35 35 35 989
+36 36 36 36 36 36 36 988
+37 37 37 37 37 37 37 987
+38 38 38 38 38 38 38 986
+39 39 39 39 39 39 39 985
+explain select * from t0 ignore index (i2) where key1 < 3 or key2 <4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1 NULL NULL NULL # Using where
+explain select * from t0 where (key1 < 3 or key2 <4) and key3 = 50;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i1,i2,i3 i3 4 const # Using where
+explain select * from t0 use index (i1,i2) where (key1 < 2 or key2 <3) and key3 = 50;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where (key1 > 1 or key2 > 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1,i2 NULL NULL NULL # Using where
+explain select * from t0 force index (i1,i2) where (key1 > 1 or key2 > 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain
+select * from t0 where key1<2 or key2<3 or (key1>5 and key1<7) or
+(key1>10 and key1<12) or (key2>100 and key2<102);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where key2 = 45 or key1 <=> null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1,i2 i2 4 NULL # Using where
+explain select * from t0 where key2 = 45 or key1 is not null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1,i2 NULL NULL NULL # Using where
+explain select * from t0 where key2 = 45 or key1 is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i2 i2 4 const # NULL
+explain select * from t0 where key2=10 or key3=3 or key4 <=> null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i2,i3,i4 i2,i3 4,4 NULL # Using union(i2,i3); Using where
+explain select * from t0 where key2=10 or key3=3 or key4 is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i2,i3 i2,i3 4,4 NULL # Using union(i2,i3); Using where
+explain select key1 from t0 where (key1 <=> null) or (key2 < 2) or
+(key3=10) or (key4 <=> null);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i4 i2,i3 4,4 NULL # Using sort_union(i2,i3); Using where
+explain select key1 from t0 where (key1 <=> null) or (key1 < 5) or
+(key3=10) or (key4 <=> null);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i3,i4 i1,i3 4,4 NULL # Using sort_union(i1,i3); Using where
+explain select * from t0 where
+(key1 < 2 or key2 < 2) and (key3 < 3 or key4 < 3) and (key5 < 5 or key6 < 5);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i4,i5,i6 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain
+select * from t0 where (key1 < 2 or key2 < 4) and (key1 < 5 or key3 < 3);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+select * from t0 where (key1 < 2 or key2 < 4) and (key1 < 5 or key3 < 3);
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+3 3 3 3 3 3 3 1021
+explain select * from t0 where
+(key1 < 3 or key2 < 2) and (key3 < 3 or key4 < 3) and (key5 < 2 or key6 < 2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i4,i5,i6 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where
+(key1 < 3 or key2 < 3) and (key3 < 70);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1,i2,i3 i3 4 NULL # Using index condition; Using where
+explain select * from t0 where
+(key1 < 3 or key2 < 3) and (key3 < 1000);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+explain select * from t0 where
+((key1 < 3 or key2 < 3) and (key2 <4 or key3 < 3))
+or
+key2 > 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1,i2,i3 NULL NULL NULL # Using where
+explain select * from t0 where
+((key1 < 4 or key2 < 4) and (key2 <4 or key3 < 3))
+or
+key1 < 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+select * from t0 where
+((key1 < 4 or key2 < 4) and (key2 <4 or key3 < 3))
+or
+key1 < 5;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+3 3 3 3 3 3 3 1021
+4 4 4 4 4 4 4 1020
+explain select * from t0 where
+((key1 < 2 or key2 < 2) and (key3 <4 or key5 < 3))
+or
+((key5 < 3 or key6 < 3) and (key7 <3 or key8 < 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6,i7,i8 i1,i2,i5,i6 4,4,4,4 NULL # Using sort_union(i1,i2,i5,i6); Using where
+explain select * from t0 where
+((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 3))
+or
+((key7 <5 or key8 < 3) and (key5 < 4 or key6 < 4));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6,i7,i8 i3,i5,i7,i8 4,4,4,4 NULL # Using sort_union(i3,i5,i7,i8); Using where
+explain select * from t0 where
+((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 4))
+or
+((key3 <4 or key5 < 2) and (key5 < 5 or key6 < 3));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6 i3,i5 4,4 NULL # Using sort_union(i3,i5); Using where
+explain select * from t0 where
+((key3 <4 or key5 < 3) and (key1 < 3 or key2 < 3))
+or
+(((key3 <5 and key7 < 5) or key5 < 2) and (key5 < 4 or key6 < 4));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6,i7 i3,i5 4,4 NULL # Using sort_union(i3,i5); Using where
+explain select * from t0 where
+((key3 <5 or key5 < 4) and (key1 < 4 or key2 < 4))
+or
+((key3 >5 or key5 < 2) and (key5 < 5 or key6 < 6));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1,i2,i3,i5,i6 NULL NULL NULL # Using where
+explain select * from t0 force index(i1, i2, i3, i4, i5, i6 ) where
+((key3 <3 or key5 < 4) and (key1 < 3 or key2 < 3))
+or
+((key3 >4 or key5 < 2) and (key5 < 5 or key6 < 4));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2,i3,i5,i6 i3,i5 4,4 NULL # Using sort_union(i3,i5); Using where
+explain select * from t0 force index(i1, i2, i3, i4, i5, i6 ) where
+((key3 <5 or key5 < 4) and (key1 < 4 or key2 < 4))
+or
+((key3 >=5 or key5 < 2) and (key5 < 5 or key6 < 6));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL i1,i2,i3,i5,i6 NULL NULL NULL # Using where
+select * from t0 where key1 < 3 or key8 < 2 order by key1;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+1023 1023 1023 1023 1023 1023 1023 1
+1024 1024 1024 1024 1024 1024 1024 0
+explain
+select * from t0 where key1 < 3 or key8 < 2 order by key1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i8 i1,i8 4,4 NULL # Using sort_union(i1,i8); Using where; Using filesort
+create table t2 like t0;
+insert into t2 select * from t0;
+alter table t2 add index i1_3(key1, key3);
+alter table t2 add index i2_3(key2, key3);
+alter table t2 drop index i1;
+alter table t2 drop index i2;
+alter table t2 add index i321(key3, key2, key1);
+explain select key3 from t2 where key1 = 100 or key2 = 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge i1_3,i2_3 i1_3,i2_3 4,4 NULL # Using sort_union(i1_3,i2_3); Using where
+explain select key3 from t2 where key1 <100 or key2 < 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge i1_3,i2_3 i1_3,i2_3 4,4 NULL # Using sort_union(i1_3,i2_3); Using where
+explain select key7 from t2 where key1 <100 or key2 < 100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge i1_3,i2_3 i1_3,i2_3 4,4 NULL # Using sort_union(i1_3,i2_3); Using where
+create table t4 (
+key1a int not null,
+key1b int not null,
+key2 int not null,
+key2_1 int not null,
+key2_2 int not null,
+key3 int not null,
+index i1a (key1a, key1b),
+index i1b (key1b, key1a),
+index i2_1(key2, key2_1),
+index i2_2(key2, key2_1)
+);
+Warnings:
+Note 1831 Duplicate index 'i2_2' defined on the table 'test.t4'. This is deprecated and will be disallowed in a future release.
+insert into t4 select key1,key1,key1 div 10, key1 % 10, key1 % 10, key1 from t0;
+select * from t4 where key1a = 3 or key1b = 4;
+key1a key1b key2 key2_1 key2_2 key3
+3 3 0 3 3 3
+4 4 0 4 4 4
+explain select * from t4 where key1a = 3 or key1b = 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 index_merge i1a,i1b i1a,i1b 4,4 NULL 2 Using sort_union(i1a,i1b); Using where
+explain select * from t4 where key2 = 1 and (key2_1 = 1 or key3 = 5);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref i2_1,i2_2 i2_1 4 const 1 Using where
+explain select * from t4 where key2 = 1 and (key2_1 = 1 or key2_2 = 5);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ref i2_1,i2_2 i2_1 4 const 1 Using where
+explain select * from t4 where key2_1 = 1 or key2_2 = 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t4 ALL NULL NULL NULL NULL # Using where
+create table t1 like t0;
+insert into t1 select * from t0;
+explain select * from t0 left join t1 on (t0.key1=t1.key1)
+where t0.key1=3 or t0.key2=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL 2 Using union(i1,i2); Using where
+1 SIMPLE t1 ref i1 i1 4 test.t0.key1 1 NULL
+select * from t0 left join t1 on (t0.key1=t1.key1)
+where t0.key1=3 or t0.key2=4;
+key1 key2 key3 key4 key5 key6 key7 key8 key1 key2 key3 key4 key5 key6 key7 key8
+3 3 3 3 3 3 3 1021 3 3 3 3 3 3 3 1021
+4 4 4 4 4 4 4 1020 4 4 4 4 4 4 4 1020
+explain
+select * from t0,t1 where (t0.key1=t1.key1) and ( t0.key1=3 or t0.key2=4);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL 2 Using union(i1,i2); Using where
+1 SIMPLE t1 ref i1 i1 4 test.t0.key1 1 NULL
+explain
+select * from t0,t1 where (t0.key1=t1.key1) and
+(t0.key1=3 or t0.key2<4) and t1.key1=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i1,i2 i1 4 const 1 Using where
+1 SIMPLE t1 ref i1 i1 4 const 1 NULL
+explain select * from t0,t1 where t0.key1 = 5 and
+(t1.key1 = t0.key1 or t1.key8 = t0.key1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ref i1 i1 4 const 1 NULL
+1 SIMPLE t1 index_merge i1,i8 i1,i8 4,4 NULL 2 Using union(i1,i8); Using where; Using join buffer (Block Nested Loop)
+explain select * from t0,t1 where t0.key1 < 3 and
+(t1.key1 = t0.key1 or t1.key8 = t0.key1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 range i1 i1 4 NULL # Using index condition
+1 SIMPLE t1 ALL i1,i8 NULL NULL NULL # Range checked for each record (index map: 0x81)
+explain select * from t1 where key1=3 or key2=4
+union select * from t1 where key1<4 or key3=5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index_merge i1,i2 i1,i2 4,4 NULL 2 Using union(i1,i2); Using where
+2 UNION t1 index_merge i1,i3 i1,i3 4,4 NULL 2 Using sort_union(i1,i3); Using where
+NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL Using temporary
+explain select * from (select * from t1 where key1 = 3 or key2 =3) as Z where key8 >5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 Using where
+2 DERIVED t1 index_merge i1,i2 i1,i2 4,4 NULL 2 Using union(i1,i2); Using where
+create table t3 like t0;
+insert into t3 select * from t0;
+alter table t3 add key9 int not null, add index i9(key9);
+alter table t3 add keyA int not null, add index iA(keyA);
+alter table t3 add keyB int not null, add index iB(keyB);
+alter table t3 add keyC int not null, add index iC(keyC);
+update t3 set key9=key1,keyA=key1,keyB=key1,keyC=key1;
+explain select * from t3 where
+key1=1 or key2=2 or key3=3 or key4=4 or
+key5=5 or key6=6 or key7=7 or key8=8 or
+key9=9 or keyA=10 or keyB=11 or keyC=12;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 index_merge i1,i2,i3,i4,i5,i6,i7,i8,i9,iA,iB,iC i1,i2,i3,i4,i5,i6,i7,i8,i9,iA,iB,iC 4,4,4,4,4,4,4,4,4,4,4,4 NULL 12 Using union(i1,i2,i3,i4,i5,i6,i7,i8,i9,iA,iB,iC); Using where
+select * from t3 where
+key1=1 or key2=2 or key3=3 or key4=4 or
+key5=5 or key6=6 or key7=7 or key8=8 or
+key9=9 or keyA=10 or keyB=11 or keyC=12;
+key1 key2 key3 key4 key5 key6 key7 key8 key9 keyA keyB keyC
+1 1 1 1 1 1 1 1023 1 1 1 1
+2 2 2 2 2 2 2 1022 2 2 2 2
+3 3 3 3 3 3 3 1021 3 3 3 3
+4 4 4 4 4 4 4 1020 4 4 4 4
+5 5 5 5 5 5 5 1019 5 5 5 5
+6 6 6 6 6 6 6 1018 6 6 6 6
+7 7 7 7 7 7 7 1017 7 7 7 7
+9 9 9 9 9 9 9 1015 9 9 9 9
+10 10 10 10 10 10 10 1014 10 10 10 10
+11 11 11 11 11 11 11 1013 11 11 11 11
+12 12 12 12 12 12 12 1012 12 12 12 12
+1016 1016 1016 1016 1016 1016 1016 8 1016 1016 1016 1016
+explain select * from t0 where key1 < 3 or key2 < 4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 index_merge i1,i2 i1,i2 4,4 NULL 2 Using sort_union(i1,i2); Using where
+select * from t0 where key1 < 3 or key2 < 4;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 1023
+2 2 2 2 2 2 2 1022
+3 3 3 3 3 3 3 1021
+update t0 set key8=123 where key1 < 3 or key2 < 4;
+select * from t0 where key1 < 3 or key2 < 4;
+key1 key2 key3 key4 key5 key6 key7 key8
+1 1 1 1 1 1 1 123
+2 2 2 2 2 2 2 123
+3 3 3 3 3 3 3 123
+delete from t0 where key1 < 3 or key2 < 4;
+select * from t0 where key1 < 3 or key2 < 4;
+key1 key2 key3 key4 key5 key6 key7 key8
+select count(*) from t0;
+count(*)
+1021
+drop table t4;
+create table t4 (a int);
+insert into t4 values (1),(4),(3);
+set @save_join_buffer_size=@@join_buffer_size;
+set join_buffer_size= 4096;
+explain select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 < 500000 or A.key2 < 3)
+and (B.key1 < 500000 or B.key2 < 3);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE A index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where
+1 SIMPLE B index_merge i1,i2 i1,i2 4,4 NULL # Using sort_union(i1,i2); Using where; Using join buffer (Block Nested Loop)
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 < 500000 or A.key2 < 3)
+and (B.key1 < 500000 or B.key2 < 3);
+max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+10240
+update t0 set key1=1;
+explain select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 = 1 or A.key2 = 1)
+and (B.key1 = 1 or B.key2 = 1);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE A index_merge i1,i2 i1,i2 4,4 NULL # Using union(i1,i2); Using where
+1 SIMPLE B index_merge i1,i2 i1,i2 4,4 NULL # Using union(i1,i2); Using where; Using join buffer (Block Nested Loop)
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A force index(i1,i2), t0 as B force index (i1,i2)
+where (A.key1 = 1 or A.key2 = 1)
+and (B.key1 = 1 or B.key2 = 1);
+max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+8194
+alter table t0 add filler1 char(200), add filler2 char(200), add filler3 char(200);
+update t0 set key2=1, key3=1, key4=1, key5=1,key6=1,key7=1 where key7 < 500;
+select max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+from t0 as A, t0 as B
+where (A.key1 = 1 and A.key2 = 1 and A.key3 = 1 and A.key4=1 and A.key5=1 and A.key6=1 and A.key7 = 1 or A.key8=1)
+and (B.key1 = 1 and B.key2 = 1 and B.key3 = 1 and B.key4=1 and B.key5=1 and B.key6=1 and B.key7 = 1 or B.key8=1);
+max(A.key1 + B.key1 + A.key2 + B.key2 + A.key3 + B.key3 + A.key4 + B.key4 + A.key5 + B.key5)
+8186
+set join_buffer_size= @save_join_buffer_size;
+drop table t0, t1, t2, t3, t4;
+CREATE TABLE t1 (
+cola char(3) not null, colb char(3) not null, filler char(200),
+key(cola), key(colb)
+);
+INSERT INTO t1 VALUES ('foo','bar', 'ZZ'),('fuz','baz', 'ZZ');
+OPTIMIZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 optimize status OK
+select count(*) from t1;
+count(*)
+8704
+explain select * from t1 WHERE cola = 'foo' AND colb = 'bar';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge cola,colb cola,colb 3,3 NULL # Using intersect(cola,colb); Using where
+explain select * from t1 force index(cola,colb) WHERE cola = 'foo' AND colb = 'bar';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge cola,colb cola,colb 3,3 NULL # Using intersect(cola,colb); Using where
+drop table t1;
+CREATE TABLE t1(a INT);
+INSERT INTO t1 VALUES(1);
+CREATE TABLE t2(a INT, b INT, dummy CHAR(16) DEFAULT '', KEY(a), KEY(b));
+INSERT INTO t2(a,b) VALUES
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),(0,0),
+(1,2);
+LOCK TABLES t1 WRITE, t2 WRITE;
+INSERT INTO t2(a,b) VALUES(1,2);
+SELECT t2.a FROM t1,t2 WHERE t2.b=2 AND t2.a=1;
+a
+1
+1
+UNLOCK TABLES;
+DROP TABLE t1, t2;
+CREATE TABLE `t1` (
+`a` int(11) DEFAULT NULL,
+`filler` char(200) DEFAULT NULL,
+`b` int(11) DEFAULT NULL,
+KEY `a` (`a`),
+KEY `b` (`b`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1;
+insert into t1 values
+(0, 'filler', 0), (1, 'filler', 1), (2, 'filler', 2), (3, 'filler', 3),
+(4, 'filler', 4), (5, 'filler', 5), (6, 'filler', 6), (7, 'filler', 7),
+(8, 'filler', 8), (9, 'filler', 9), (0, 'filler', 0), (1, 'filler', 1),
+(2, 'filler', 2), (3, 'filler', 3), (4, 'filler', 4), (5, 'filler', 5),
+(6, 'filler', 6), (7, 'filler', 7), (8, 'filler', 8), (9, 'filler', 9),
+(10, 'filler', 10), (11, 'filler', 11), (12, 'filler', 12), (13, 'filler', 13),
+(14, 'filler', 14), (15, 'filler', 15), (16, 'filler', 16), (17, 'filler', 17),
+(18, 'filler', 18), (19, 'filler', 19), (4, '5 ', 0), (5, '4 ', 0),
+(4, '4 ', 0), (4, 'qq ', 5), (5, 'qq ', 4), (4, 'zz ', 4);
+create table t2(
+`a` int(11) DEFAULT NULL,
+`filler` char(200) DEFAULT NULL,
+`b` int(11) DEFAULT NULL,
+KEY USING BTREE (`a`),
+KEY USING BTREE (`b`)
+) ENGINE=MEMORY DEFAULT CHARSET=latin1;
+insert into t2 select * from t1;
+must use sort-union rather than union:
+explain select * from t1 where a=4 or b=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge a,b a,b 5,5 NULL # Using sort_union(a,b); Using where
+select * from t1 where a=4 or b=4;
+a filler b
+4 4 0
+4 5 0
+4 filler 4
+4 filler 4
+4 qq 5
+4 zz 4
+5 qq 4
+select * from t1 ignore index(a,b) where a=4 or b=4;
+a filler b
+4 4 0
+4 5 0
+4 filler 4
+4 filler 4
+4 qq 5
+4 zz 4
+5 qq 4
+must use union, not sort-union:
+explain select * from t2 where a=4 or b=4;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 index_merge a,b a,b 5,5 NULL # Using union(a,b); Using where
+select * from t2 where a=4 or b=4;
+a filler b
+4 4 0
+4 5 0
+4 filler 4
+4 filler 4
+4 qq 5
+4 zz 4
+5 qq 4
+drop table t1, t2;
+CREATE TABLE t1 (a varchar(8), b set('a','b','c','d','e','f','g','h'),
+KEY b(b), KEY a(a));
+INSERT INTO t1 VALUES ('y',''), ('z','');
+SELECT b,a from t1 WHERE (b!='c' AND b!='f' && b!='h') OR
+(a='pure-S') OR (a='DE80337a') OR (a='DE80799');
+b a
+ y
+ z
+DROP TABLE t1;
+#
+# BUG#40974: Incorrect query results when using clause evaluated using range check
+#
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1 (a int);
+insert into t1 values (1),(2);
+create table t2(a int, b int);
+insert into t2 values (1,1), (2, 1000);
+create table t3 (a int, b int, filler char(100), key(a), key(b));
+insert into t3 select 1000, 1000,'filler' from t0 A, t0 B, t0 C;
+insert into t3 values (1,1,'data');
+insert into t3 values (1,1,'data');
+The plan should be ALL/ALL/ALL(Range checked for each record (index map: 0x3)
+explain select * from t1
+where exists (select 1 from t2, t3
+where t2.a=t1.a and (t3.a=t2.b or t3.b=t2.b or t3.b=t2.b+1));
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL # Using where
+2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL # Using where
+2 DEPENDENT SUBQUERY t3 ALL a,b NULL NULL NULL # Range checked for each record (index map: 0x3)
+select * from t1
+where exists (select 1 from t2, t3
+where t2.a=t1.a and (t3.a=t2.b or t3.b=t2.b or t3.b=t2.b+1));
+a
+1
+2
+drop table t0, t1, t2, t3;
+#
+# BUG#44810: index merge and order by with low sort_buffer_size
+# crashes server!
+#
+CREATE TABLE t1(a VARCHAR(128),b VARCHAR(128),KEY(A),KEY(B));
+INSERT INTO t1 VALUES (REPEAT('a',128),REPEAT('b',128));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+SET SESSION sort_buffer_size=1;
+Warnings:
+Warning 1292 Truncated incorrect sort_buffer_size value: '1'
+EXPLAIN
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%'
+ORDER BY a,b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge a,b a,b 131,131 NULL # Using sort_union(a,b); Using where; Using filesort
+SELECT * FROM t1 FORCE INDEX(a,b) WHERE a LIKE 'a%' OR b LIKE 'b%'
+ORDER BY a,b;
+SET SESSION sort_buffer_size=DEFAULT;
+DROP TABLE t1;
+End of 5.0 tests
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- ROR-index_merge tests -----------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t0,t1,t2;
+create table t1
+(
+/* Field names reflect value(rowid) distribution, st=STairs, swt= SaWTooth */
+st_a int not null default 0,
+swt1a int not null default 0,
+swt2a int not null default 0,
+st_b int not null default 0,
+swt1b int not null default 0,
+swt2b int not null default 0,
+/* fields/keys for row retrieval tests */
+key1 int,
+key2 int,
+key3 int,
+key4 int,
+/* make rows much bigger then keys */
+filler1 char (200),
+filler2 char (200),
+filler3 char (200),
+filler4 char (200),
+filler5 char (200),
+filler6 char (200),
+/* order of keys is important */
+key sta_swt12a(st_a,swt1a,swt2a),
+key sta_swt1a(st_a,swt1a),
+key sta_swt2a(st_a,swt2a),
+key sta_swt21a(st_a,swt2a,swt1a),
+key st_a(st_a),
+key stb_swt1a_2b(st_b,swt1b,swt2a),
+key stb_swt1b(st_b,swt1b),
+key st_b(st_b),
+key(key1),
+key(key2),
+key(key3),
+key(key4)
+) ;
+create table t0 as select * from t1;
+# Printing of many insert into t0 values (....) disabled.
+alter table t1 disable keys;
+Warnings:
+Note 1031 Table storage engine for 't1' doesn't have this option
+# Printing of many insert into t1 select .... from t0 disabled.
+# Printing of many insert into t1 (...) values (....) disabled.
+alter table t1 enable keys;
+Warnings:
+Note 1031 Table storage engine for 't1' doesn't have this option
+select count(*) from t1;
+count(*)
+64801
+explain select key1,key2 from t1 where key1=100 and key2=100;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge key1,key2 key2,key1 5,5 NULL 2 Using intersect(key2,key1); Using where; Using index
+select key1,key2 from t1 where key1=100 and key2=100;
+key1 key2
+100 100
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+100 100 100 100 key1-key2-key3-key4
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, -1, -1, 'key1-key2');
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 100, 100, 'key4-key3');
+select key1,key2,filler1 from t1 where key1=100 and key2=100;
+key1 key2 filler1
+100 100 key1-key2-key3-key4
+100 100 key1-key2
+select key1,key2 from t1 where key1=100 and key2=100;
+key1 key2
+100 100
+100 100
+select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4
+100 100 100 100
+100 100 -1 -1
+-1 -1 100 100
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+100 100 100 100 key1-key2-key3-key4
+100 100 -1 -1 key1-key2
+-1 -1 100 100 key4-key3
+select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100;
+key1 key2 key3
+100 100 100
+insert into t1 (key1,key2,key3,key4,filler1) values (101,101,101,101, 'key1234-101');
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101;
+key1 key2 key3 key4 filler1
+100 100 100 100 key1-key2-key3-key4
+100 100 -1 -1 key1-key2
+101 101 101 101 key1234-101
+select key1,key2, filler1 from t1 where key1=100 and key2=100;
+key1 key2 filler1
+100 100 key1-key2-key3-key4
+100 100 key1-key2
+update t1 set filler1='to be deleted' where key1=100 and key2=100;
+update t1 set key1=200,key2=200 where key1=100 and key2=100;
+delete from t1 where key1=200 and key2=200;
+select key1,key2,filler1 from t1 where key2=100 and key2=200;
+key1 key2 filler1
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+-1 -1 100 100 key4-key3
+delete from t1 where key3=100 and key4=100;
+select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100;
+key1 key2 key3 key4 filler1
+select key1,key2 from t1 where key1=100 and key2=100;
+key1 key2
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-1');
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-2');
+insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-3');
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+key1 key2 key3 key4 filler1
+100 100 200 200 key1-key2-key3-key4-1
+100 100 200 200 key1-key2-key3-key4-2
+100 100 200 200 key1-key2-key3-key4-3
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, -1, 200,'key4');
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+key1 key2 key3 key4 filler1
+100 100 200 200 key1-key2-key3-key4-1
+100 100 200 200 key1-key2-key3-key4-2
+100 100 200 200 key1-key2-key3-key4-3
+-1 -1 -1 200 key4
+insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 200, -1,'key3');
+select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200;
+key1 key2 key3 key4 filler1
+100 100 200 200 key1-key2-key3-key4-1
+100 100 200 200 key1-key2-key3-key4-2
+100 100 200 200 key1-key2-key3-key4-3
+-1 -1 -1 200 key4
+-1 -1 200 -1 key3
+drop table t0,t1;
+create table t2 (
+a char(10),
+b char(10),
+filler1 char(255),
+filler2 char(255),
+key(a(5)),
+key(b(5))
+);
+select count(a) from t2 where a='BBBBBBBB';
+count(a)
+4
+select count(a) from t2 where b='BBBBBBBB';
+count(a)
+4
+expla_or_bin select count(a_or_b) from t2 where a_or_b='AAAAAAAA' a_or_bnd a_or_b='AAAAAAAA';
+id select_type ta_or_ba_or_ble type possia_or_ble_keys key key_len ref rows Extra_or_b
+1 SIMPLE t2 ref a_or_b,a_or_b a_or_b 6 const 1 Using where
+select count(a) from t2 where a='AAAAAAAA' and b='AAAAAAAA';
+count(a)
+4
+select count(a) from t2 ignore index(a,b) where a='AAAAAAAA' and b='AAAAAAAA';
+count(a)
+4
+insert into t2 values ('ab', 'ab', 'uh', 'oh');
+explain select a from t2 where a='ab';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ref a a 6 const 1 Using where
+drop table t2;
+CREATE TABLE t1(c1 INT, c2 INT DEFAULT 0, c3 CHAR(255) DEFAULT '',
+KEY(c1), KEY(c2), KEY(c3));
+INSERT INTO t1(c1) VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),
+(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0),(0);
+INSERT INTO t1 VALUES(0,0,0);
+CREATE TABLE t2(c1 int);
+INSERT INTO t2 VALUES(1);
+DELETE t1 FROM t1,t2 WHERE t1.c1=0 AND t1.c2=0;
+SELECT * FROM t1;
+c1 c2 c3
+DROP TABLE t1,t2;
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- Index merge test 2 -------------------------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t1,t2;
+create table t1
+(
+key1 int not null,
+key2 int not null,
+INDEX i1(key1),
+INDEX i2(key2)
+);
+explain select * from t1 where key1 < 5 or key2 > 197;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge i1,i2 i1,i2 4,4 NULL 2 Using sort_union(i1,i2); Using where
+select * from t1 where key1 < 5 or key2 > 197;
+key1 key2
+0 200
+1 199
+2 198
+3 197
+4 196
+explain select * from t1 where key1 < 3 or key2 > 195;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge i1,i2 i1,i2 4,4 NULL 2 Using sort_union(i1,i2); Using where
+select * from t1 where key1 < 3 or key2 > 195;
+key1 key2
+0 200
+1 199
+2 198
+3 197
+4 196
+alter table t1 add str1 char (255) not null,
+add zeroval int not null default 0,
+add str2 char (255) not null,
+add str3 char (255) not null;
+update t1 set str1='aaa', str2='bbb', str3=concat(key2, '-', key1 div 2, '_' ,if(key1 mod 2 = 0, 'a', 'A'));
+alter table t1 add primary key (str1, zeroval, str2, str3);
+explain select * from t1 where key1 < 5 or key2 > 197;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL i1,i2 NULL NULL NULL 200 Using where
+select * from t1 where key1 < 5 or key2 > 197;
+key1 key2 str1 zeroval str2 str3
+4 196 aaa 0 bbb 196-2_a
+3 197 aaa 0 bbb 197-1_A
+2 198 aaa 0 bbb 198-1_a
+1 199 aaa 0 bbb 199-0_A
+0 200 aaa 0 bbb 200-0_a
+explain select * from t1 where key1 < 3 or key2 > 195;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL i1,i2 NULL NULL NULL 200 Using where
+select * from t1 where key1 < 3 or key2 > 195;
+key1 key2 str1 zeroval str2 str3
+4 196 aaa 0 bbb 196-2_a
+3 197 aaa 0 bbb 197-1_A
+2 198 aaa 0 bbb 198-1_a
+1 199 aaa 0 bbb 199-0_A
+0 200 aaa 0 bbb 200-0_a
+drop table t1;
+create table t1 (
+pk integer not null auto_increment primary key,
+key1 integer,
+key2 integer not null,
+filler char (200),
+index (key1),
+index (key2)
+);
+show warnings;
+Level Code Message
+explain select pk from t1 where key1 = 1 and key2 = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,key2 key1 5 const 1 Using where
+select pk from t1 where key2 = 1 and key1 = 1;
+pk
+26
+27
+select pk from t1 ignore index(key1,key2) where key2 = 1 and key1 = 1;
+pk
+26
+27
+drop table t1;
+create table t1 (
+pk int primary key auto_increment,
+key1a int,
+key2a int,
+key1b int,
+key2b int,
+dummy1 int,
+dummy2 int,
+dummy3 int,
+dummy4 int,
+key3a int,
+key3b int,
+filler1 char (200),
+index i1(key1a, key1b),
+index i2(key2a, key2b),
+index i3(key3a, key3b)
+);
+create table t2 (a int);
+insert into t2 values (0),(1),(2),(3),(4),(NULL);
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+select A.a, B.a, C.a, D.a, C.a, D.a from t2 A,t2 B,t2 C, t2 D;
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+select key1a, key1b, key2a, key2b, key3a, key3b from t1;
+insert into t1 (key1a, key1b, key2a, key2b, key3a, key3b)
+select key1a, key1b, key2a, key2b, key3a, key3b from t1;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select count(*) from t1;
+count(*)
+5184
+select count(*) from t1 where
+key1a = 2 and key1b is null and key2a = 2 and key2b is null;
+count(*)
+4
+select count(*) from t1 where
+key1a = 2 and key1b is null and key3a = 2 and key3b is null;
+count(*)
+4
+drop table t1,t2;
+create table t1 (
+id1 int,
+id2 date ,
+index idx2 (id1,id2),
+index idx1 (id2)
+);
+insert into t1 values(1,'20040101'), (2,'20040102');
+select * from t1 where id1 = 1 and id2= '20040101';
+id1 id2
+1 2004-01-01
+drop table t1;
+drop view if exists v1;
+CREATE TABLE t1 (
+`oid` int(11) unsigned NOT NULL auto_increment,
+`fk_bbk_niederlassung` int(11) unsigned NOT NULL,
+`fk_wochentag` int(11) unsigned NOT NULL,
+`uhrzeit_von` time NOT NULL COMMENT 'HH:MM',
+`uhrzeit_bis` time NOT NULL COMMENT 'HH:MM',
+`geloescht` tinyint(4) NOT NULL,
+`version` int(5) NOT NULL,
+PRIMARY KEY (`oid`),
+KEY `fk_bbk_niederlassung` (`fk_bbk_niederlassung`),
+KEY `fk_wochentag` (`fk_wochentag`),
+KEY `ix_version` (`version`)
+) DEFAULT CHARSET=latin1;
+insert into t1 values
+(1, 38, 1, '08:00:00', '13:00:00', 0, 1),
+(2, 38, 2, '08:00:00', '13:00:00', 0, 1),
+(3, 38, 3, '08:00:00', '13:00:00', 0, 1),
+(4, 38, 4, '08:00:00', '13:00:00', 0, 1),
+(5, 38, 5, '08:00:00', '13:00:00', 0, 1),
+(6, 38, 5, '08:00:00', '13:00:00', 1, 2),
+(7, 38, 3, '08:00:00', '13:00:00', 1, 2),
+(8, 38, 1, '08:00:00', '13:00:00', 1, 2),
+(9, 38, 2, '08:00:00', '13:00:00', 1, 2),
+(10, 38, 4, '08:00:00', '13:00:00', 1, 2),
+(11, 38, 1, '08:00:00', '13:00:00', 0, 3),
+(12, 38, 2, '08:00:00', '13:00:00', 0, 3),
+(13, 38, 3, '08:00:00', '13:00:00', 0, 3),
+(14, 38, 4, '08:00:00', '13:00:00', 0, 3),
+(15, 38, 5, '08:00:00', '13:00:00', 0, 3),
+(16, 38, 4, '08:00:00', '13:00:00', 0, 4),
+(17, 38, 5, '08:00:00', '13:00:00', 0, 4),
+(18, 38, 1, '08:00:00', '13:00:00', 0, 4),
+(19, 38, 2, '08:00:00', '13:00:00', 0, 4),
+(20, 38, 3, '08:00:00', '13:00:00', 0, 4),
+(21, 7, 1, '08:00:00', '13:00:00', 0, 1),
+(22, 7, 2, '08:00:00', '13:00:00', 0, 1),
+(23, 7, 3, '08:00:00', '13:00:00', 0, 1),
+(24, 7, 4, '08:00:00', '13:00:00', 0, 1),
+(25, 7, 5, '08:00:00', '13:00:00', 0, 1);
+create view v1 as
+select
+zeit1.oid AS oid,
+zeit1.fk_bbk_niederlassung AS fk_bbk_niederlassung,
+zeit1.fk_wochentag AS fk_wochentag,
+zeit1.uhrzeit_von AS uhrzeit_von,
+zeit1.uhrzeit_bis AS uhrzeit_bis,
+zeit1.geloescht AS geloescht,
+zeit1.version AS version
+from
+t1 zeit1
+where
+(zeit1.version =
+(select max(zeit2.version) AS `max(version)`
+ from t1 zeit2
+where
+((zeit1.fk_bbk_niederlassung = zeit2.fk_bbk_niederlassung) and
+(zeit1.fk_wochentag = zeit2.fk_wochentag) and
+(zeit1.uhrzeit_von = zeit2.uhrzeit_von) and
+(zeit1.uhrzeit_bis = zeit2.uhrzeit_bis)
+)
+)
+)
+and (zeit1.geloescht = 0);
+select * from v1 where oid = 21;
+oid fk_bbk_niederlassung fk_wochentag uhrzeit_von uhrzeit_bis geloescht version
+21 7 1 08:00:00 13:00:00 0 1
+drop view v1;
+drop table t1;
+CREATE TABLE t1(
+t_cpac varchar(2) NOT NULL,
+t_vers varchar(4) NOT NULL,
+t_rele varchar(2) NOT NULL,
+t_cust varchar(4) NOT NULL,
+filler1 char(250) default NULL,
+filler2 char(250) default NULL,
+PRIMARY KEY (t_cpac,t_vers,t_rele,t_cust),
+UNIQUE KEY IX_4 (t_cust,t_cpac,t_vers,t_rele),
+KEY IX_5 (t_vers,t_rele,t_cust)
+);
+insert into t1 values
+('tm','2.5 ','a ',' ','',''), ('tm','2.5U','a ','stnd','',''),
+('da','3.3 ','b ',' ','',''), ('da','3.3U','b ','stnd','',''),
+('tl','7.6 ','a ',' ','',''), ('tt','7.6 ','a ',' ','',''),
+('bc','B61 ','a ',' ','',''), ('bp','B61 ','a ',' ','',''),
+('ca','B61 ','a ',' ','',''), ('ci','B61 ','a ',' ','',''),
+('cp','B61 ','a ',' ','',''), ('dm','B61 ','a ',' ','',''),
+('ec','B61 ','a ',' ','',''), ('ed','B61 ','a ',' ','',''),
+('fm','B61 ','a ',' ','',''), ('nt','B61 ','a ',' ','',''),
+('qm','B61 ','a ',' ','',''), ('tc','B61 ','a ',' ','',''),
+('td','B61 ','a ',' ','',''), ('tf','B61 ','a ',' ','',''),
+('tg','B61 ','a ',' ','',''), ('ti','B61 ','a ',' ','',''),
+('tp','B61 ','a ',' ','',''), ('ts','B61 ','a ',' ','',''),
+('wh','B61 ','a ',' ','',''), ('bc','B61U','a ','stnd','',''),
+('bp','B61U','a ','stnd','',''), ('ca','B61U','a ','stnd','',''),
+('ci','B61U','a ','stnd','',''), ('cp','B61U','a ','stnd','',''),
+('dm','B61U','a ','stnd','',''), ('ec','B61U','a ','stnd','',''),
+('fm','B61U','a ','stnd','',''), ('nt','B61U','a ','stnd','',''),
+('qm','B61U','a ','stnd','',''), ('tc','B61U','a ','stnd','',''),
+('td','B61U','a ','stnd','',''), ('tf','B61U','a ','stnd','',''),
+('tg','B61U','a ','stnd','',''), ('ti','B61U','a ','stnd','',''),
+('tp','B61U','a ','stnd','',''), ('ts','B61U','a ','stnd','',''),
+('wh','B61U','a ','stnd','','');
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `t_cpac` varchar(2) NOT NULL,
+ `t_vers` varchar(4) NOT NULL,
+ `t_rele` varchar(2) NOT NULL,
+ `t_cust` varchar(4) NOT NULL,
+ `filler1` char(250) DEFAULT NULL,
+ `filler2` char(250) DEFAULT NULL,
+ PRIMARY KEY (`t_cpac`,`t_vers`,`t_rele`,`t_cust`),
+ UNIQUE KEY `IX_4` (`t_cust`,`t_cpac`,`t_vers`,`t_rele`),
+ KEY `IX_5` (`t_vers`,`t_rele`,`t_cust`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+select t_vers,t_rele,t_cust,filler1 from t1 where t_vers = '7.6';
+t_vers t_rele t_cust filler1
+7.6 a
+7.6 a
+select t_vers,t_rele,t_cust,filler1 from t1 where t_vers = '7.6'
+ and t_rele='a' and t_cust = ' ';
+t_vers t_rele t_cust filler1
+7.6 a
+7.6 a
+drop table t1;
+create table t1 (
+pk int(11) not null auto_increment,
+a int(11) not null default '0',
+b int(11) not null default '0',
+c int(11) not null default '0',
+filler1 datetime, filler2 varchar(15),
+filler3 longtext,
+kp1 varchar(4), kp2 varchar(7),
+kp3 varchar(2), kp4 varchar(4),
+kp5 varchar(7),
+filler4 char(1),
+primary key (pk),
+key idx1(a,b,c),
+key idx2(c),
+key idx3(kp1,kp2,kp3,kp4,kp5)
+) default charset=latin1;
+set @fill=NULL;
+SELECT COUNT(*) FROM t1 WHERE b = 0 AND a = 0 AND c = 13286427 AND
+kp1='279' AND kp2='ELM0678' AND kp3='6' AND kp4='10' AND kp5 = 'R ';
+COUNT(*)
+1
+drop table t1;
+create table t1
+(
+key1 int not null,
+key2 int not null default 0,
+key3 int not null default 0
+);
+insert into t1(key1) values (1),(2),(3),(4),(5),(6),(7),(8);
+set @d=8;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+insert into t1 (key1) select key1+@d from t1;
+set @d=@d*2;
+alter table t1 add index i2(key2);
+alter table t1 add index i3(key3);
+update t1 set key2=key1,key3=key1;
+select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
+key1 key2 key3
+31 31 31
+32 32 32
+33 33 33
+34 34 34
+35 35 35
+36 36 36
+37 37 37
+38 38 38
+39 39 39
+drop table t1;
+#
+# Bug#56423: Different count with SELECT and CREATE SELECT queries
+#
+CREATE TABLE t1 (
+a INT,
+b INT,
+c INT,
+d INT,
+PRIMARY KEY (a),
+KEY (c),
+KEY bd (b,d)
+);
+INSERT INTO t1 VALUES
+(1, 0, 1, 0),
+(2, 1, 1, 1),
+(3, 1, 1, 1),
+(4, 0, 1, 1);
+EXPLAIN
+SELECT a
+FROM t1
+WHERE c = 1 AND b = 1 AND d = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref c,bd c 5 const 1 Using where
+CREATE TABLE t2 ( a INT )
+SELECT a
+FROM t1
+WHERE c = 1 AND b = 1 AND d = 1;
+SELECT * FROM t2;
+a
+2
+3
+DROP TABLE t1, t2;
+CREATE TABLE t1( a INT, b INT, KEY(a), KEY(b) );
+INSERT INTO t1 VALUES (1, 2), (1, 2), (1, 2), (1, 2);
+SELECT * FROM t1 FORCE INDEX(a, b) WHERE a = 1 AND b = 2;
+a b
+1 2
+1 2
+1 2
+1 2
+DROP TABLE t1;
+# Code coverage of fix.
+CREATE TABLE t1 ( a INT NOT NULL AUTO_INCREMENT PRIMARY KEY, b INT);
+INSERT INTO t1 (b) VALUES (1);
+UPDATE t1 SET b = 2 WHERE a = 1;
+SELECT * FROM t1;
+a b
+1 2
+CREATE TABLE t2 ( a INT NOT NULL AUTO_INCREMENT PRIMARY KEY, b VARCHAR(1) );
+INSERT INTO t2 (b) VALUES ('a');
+UPDATE t2 SET b = 'b' WHERE a = 1;
+SELECT * FROM t2;
+a b
+1 b
+DROP TABLE t1, t2;
+#
+# BUG#13970015: ASSERT `MIN_ENDP || MAX_ENDP' FAILED IN
+# HANDLER::MULTI_RANGE_READ_INFO_CONST
+#
+CREATE TABLE t1 (
+pk INT NOT NULL,
+col_int_key INT NOT NULL,
+col_varchar_key VARCHAR(1) NOT NULL,
+PRIMARY KEY (pk),
+KEY col_int_key (col_int_key),
+KEY col_varchar_key (col_varchar_key,col_int_key)
+);
+INSERT INTO t1 VALUES (1,1,'a'), (2,2,'b');
+EXPLAIN
+SELECT col_int_key
+FROM t1
+WHERE col_varchar_key >= 'l' OR
+(((pk BETWEEN 141 AND 141) OR col_varchar_key <> 'l')
+AND ((pk BETWEEN 141 AND 141) OR (col_int_key > 141)));
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index PRIMARY,col_int_key,col_varchar_key col_varchar_key 7 NULL 2 Using where; Using index
+SELECT col_int_key
+FROM t1
+WHERE col_varchar_key >= 'l' OR
+(((pk BETWEEN 141 AND 141) OR col_varchar_key <> 'l')
+AND ((pk BETWEEN 141 AND 141) OR (col_int_key > 141)));
+col_int_key
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- 2-sweeps read Index merge test 2 -------------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t1;
+create table t1 (
+pk int primary key,
+key1 int,
+key2 int,
+filler char(200),
+filler2 char(200),
+index(key1),
+index(key2)
+);
+select * from t1 where (key1 >= 2 and key1 <= 10) or (pk >= 4 and pk <=8 );
+pk key1 key2 filler filler2
+10 10 10 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+5 5 5 filler-data filler-data-2
+6 6 6 filler-data filler-data-2
+7 7 7 filler-data filler-data-2
+8 8 8 filler-data filler-data-2
+9 9 9 filler-data filler-data-2
+set @maxv=1000;
+select * from t1 where
+(pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+or key1=18 or key1=60;
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+18 18 18 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+60 60 60 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+select * from t1 where
+(pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+or key1 < 3 or key1 > @maxv-11;
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+990 990 990 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+select * from t1 where
+(pk < 5) or (pk > 10 and pk < 15) or (pk >= 50 and pk < 55 ) or (pk > @maxv-10)
+or
+(key1 < 5) or (key1 > 10 and key1 < 15) or (key1 >= 50 and key1 < 55 ) or (key1 > @maxv-10);
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+select * from t1 where
+(pk > 10 and pk < 15) or (pk >= 50 and pk < 55 )
+or
+(key1 < 5) or (key1 > @maxv-10);
+pk key1 key2 filler filler2
+1 1 1 filler-data filler-data-2
+1000 1000 1000 filler-data filler-data-2
+11 11 11 filler-data filler-data-2
+12 12 12 filler-data filler-data-2
+13 13 13 filler-data filler-data-2
+14 14 14 filler-data filler-data-2
+2 2 2 filler-data filler-data-2
+3 3 3 filler-data filler-data-2
+4 4 4 filler-data filler-data-2
+50 50 50 filler-data filler-data-2
+51 51 51 filler-data filler-data-2
+52 52 52 filler-data filler-data-2
+53 53 53 filler-data filler-data-2
+54 54 54 filler-data filler-data-2
+991 991 991 filler-data filler-data-2
+992 992 992 filler-data filler-data-2
+993 993 993 filler-data filler-data-2
+994 994 994 filler-data filler-data-2
+995 995 995 filler-data filler-data-2
+996 996 996 filler-data filler-data-2
+997 997 997 filler-data filler-data-2
+998 998 998 filler-data filler-data-2
+999 999 999 filler-data filler-data-2
+drop table t1;
+set global rocksdb_force_flush_memtable_now=1;
+#---------------- Clustered PK ROR-index_merge tests -----------------------------
+SET SESSION DEFAULT_STORAGE_ENGINE = RocksDB;
+drop table if exists t1;
+create table t1
+(
+pk1 int not null,
+pk2 int not null,
+key1 int not null,
+key2 int not null,
+pktail1ok int not null,
+pktail2ok int not null,
+pktail3bad int not null,
+pktail4bad int not null,
+pktail5bad int not null,
+pk2copy int not null,
+badkey int not null,
+filler1 char (200),
+filler2 char (200),
+key (key1),
+key (key2),
+/* keys with tails from CPK members */
+key (pktail1ok, pk1),
+key (pktail2ok, pk1, pk2),
+key (pktail3bad, pk2, pk1),
+key (pktail4bad, pk1, pk2copy),
+key (pktail5bad, pk1, pk2, pk2copy),
+primary key (pk1, pk2)
+);
+explain select * from t1 where pk1 = 1 and pk2 < 80 and key1=0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref PRIMARY,key1 key1 8 const,const ROWS Using index condition
+select * from t1 where pk1 = 1 and pk2 < 80 and key1=0;
+pk1 pk2 key1 key2 pktail1ok pktail2ok pktail3bad pktail4bad pktail5bad pk2copy badkey filler1 filler2
+1 10 0 0 0 0 0 0 0 10 0 filler-data-10 filler2
+1 11 0 0 0 0 0 0 0 11 0 filler-data-11 filler2
+1 12 0 0 0 0 0 0 0 12 0 filler-data-12 filler2
+1 13 0 0 0 0 0 0 0 13 0 filler-data-13 filler2
+1 14 0 0 0 0 0 0 0 14 0 filler-data-14 filler2
+1 15 0 0 0 0 0 0 0 15 0 filler-data-15 filler2
+1 16 0 0 0 0 0 0 0 16 0 filler-data-16 filler2
+1 17 0 0 0 0 0 0 0 17 0 filler-data-17 filler2
+1 18 0 0 0 0 0 0 0 18 0 filler-data-18 filler2
+1 19 0 0 0 0 0 0 0 19 0 filler-data-19 filler2
+explain select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,key2 key1 4 const 1 Using index condition; Using where
+select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1;
+pk1 pk2
+95 50
+95 51
+95 52
+95 53
+95 54
+95 55
+95 56
+95 57
+95 58
+95 59
+explain select * from t1 where badkey=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1 key1 4 const ROWS Using where
+explain select * from t1 where pk1 < 7500 and key1 = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge PRIMARY,key1 key1,PRIMARY 8,4 NULL ROWS Using intersect(key1,PRIMARY); Using where
+explain select * from t1 where pktail1ok=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail1ok key1 4 const 1 Using where
+explain select * from t1 where pktail2ok=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail2ok key1 4 const 1 Using where
+explain select * from t1 where (pktail2ok=1 and pk1< 50000) or key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index_merge PRIMARY,key1,pktail2ok pktail2ok,key1 8,4 NULL ROWS Using sort_union(pktail2ok,key1); Using where
+explain select * from t1 where pktail3bad=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail3bad EITHER_KEY 4 const ROWS Using where
+explain select * from t1 where pktail4bad=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail4bad key1 4 const ROWS Using where
+explain select * from t1 where pktail5bad=1 and key1=10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,pktail5bad key1 4 const ROWS Using where
+explain select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref key1,key2 key1 4 const 1 Using where
+select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10;
+pk1 pk2 key1 key2
+95 50 10 10
+95 51 10 10
+95 52 10 10
+95 53 10 10
+95 54 10 10
+95 55 10 10
+95 56 10 10
+95 57 10 10
+95 58 10 10
+95 59 10 10
+drop table t1;
+create table t1
+(
+RUNID varchar(22),
+SUBMITNR varchar(5),
+ORDERNR char(1),
+PROGRAMM varchar(8),
+TESTID varchar(4),
+UCCHECK char(1),
+ETEXT varchar(80),
+ETEXT_TYPE char(1),
+INFO char(1),
+SEVERITY tinyint(3),
+TADIRFLAG char(1),
+PRIMARY KEY (RUNID,SUBMITNR,ORDERNR,PROGRAMM,TESTID,UCCHECK),
+KEY `TVERM~KEY` (PROGRAMM,TESTID,UCCHECK)
+) DEFAULT CHARSET=latin1;
+update t1 set `ETEXT` = '', `ETEXT_TYPE`='', `INFO`='', `SEVERITY`='', `TADIRFLAG`=''
+WHERE
+`RUNID`= '' AND `SUBMITNR`= '' AND `ORDERNR`='' AND `PROGRAMM`='' AND
+`TESTID`='' AND `UCCHECK`='';
+drop table t1;
+#
+# Bug#50402 Optimizer producing wrong results when using Index Merge on InnoDB
+#
+CREATE TABLE t1 (f1 INT, PRIMARY KEY (f1));
+INSERT INTO t1 VALUES (2);
+CREATE TABLE t2 (f1 INT, f2 INT, f3 char(1),
+PRIMARY KEY (f1), KEY (f2), KEY (f3) );
+INSERT INTO t2 VALUES (1, 1, 'h'), (2, 3, 'h'), (3, 2, ''), (4, 2, '');
+SELECT t1.f1 FROM t1
+WHERE (SELECT COUNT(*) FROM t2 WHERE t2.f3 = 'h' AND t2.f2 = t1.f1) = 0 AND t1.f1 = 2;
+f1
+2
+EXPLAIN SELECT t1.f1 FROM t1
+WHERE (SELECT COUNT(*) FROM t2 WHERE t2.f3 = 'h' AND t2.f2 = t1.f1) = 0 AND t1.f1 = 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 const PRIMARY PRIMARY 4 const 1 Using index
+2 DEPENDENT SUBQUERY t2 ref f2,f3 f2 5 const 1 Using where
+DROP TABLE t1,t2;
+set global rocksdb_force_flush_memtable_now=1;
+#
+# Bug#11747423 32254: INDEX MERGE USED UNNECESSARILY
+#
+CREATE TABLE t1 (
+id INT NOT NULL PRIMARY KEY,
+id2 INT NOT NULL,
+id3 INT NOT NULL,
+KEY (id2),
+KEY (id3),
+KEY covering_index (id2,id3)
+) ENGINE=RocksDB;
+INSERT INTO t1 VALUES (0, 0, 0), (1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4), (5, 5, 5), (6, 6, 6), (7, 7, 7);
+INSERT INTO t1 SELECT id + 8, id2 + 8, id3 +8 FROM t1;
+INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1;
+EXPLAIN SELECT SQL_NO_CACHE count(*) FROM t1 WHERE id2=7 AND id3=0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ref id2,id3,covering_index id2 4 const 1 Using where
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
index 66481f81c67..1f4c88e5a33 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_primary.result
@@ -46,3 +46,26 @@ SHOW KEYS IN t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
t1 0 PRIMARY 1 b A # NULL NULL LSMTREE
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 767 bytes
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+ERROR 42000: Specified key was too long; max key length is 3072 bytes
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
index a604663954b..6ddb3d45695 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result
@@ -40,3 +40,30 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
index ae99badff14..08b17ffe8ee 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result
@@ -40,3 +40,30 @@ t1 0 PRIMARY 1 pk A # NULL NULL LSMTREE
t1 1 a 1 a A # NULL NULL YES LSMTREE simple index on a
ALTER TABLE t1 DROP KEY a;
DROP TABLE t1;
+set global rocksdb_large_prefix=0;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(1024),
+KEY (a(768))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 767 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a BLOB(4096),
+KEY (a(3073))
+) ENGINE=rocksdb;
+Warnings:
+Warning 1071 Specified key was too long; max key length is 3072 bytes
+DROP TABLE t1;
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
index f55662183ca..291effa832c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/information_schema.result
@@ -39,12 +39,10 @@ a int,
b int,
c int,
d int,
-e int,
PRIMARY KEY (a) COMMENT "cf_a",
KEY (b) COMMENT "cf_b",
KEY (c) COMMENT "cf_c",
-KEY (d) COMMENT "$per_index_cf",
-KEY (e) COMMENT "rev:cf_d") ENGINE=ROCKSDB;
+KEY (d) COMMENT "rev:cf_d") ENGINE=ROCKSDB;
select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where TYPE = 'CF_FLAGS';
TYPE NAME VALUE
CF_FLAGS 0 default [0]
@@ -52,8 +50,7 @@ CF_FLAGS 1 __system__ [0]
CF_FLAGS 2 cf_a [0]
CF_FLAGS 3 cf_b [0]
CF_FLAGS 4 cf_c [0]
-CF_FLAGS 5 test.t2.d [2]
-CF_FLAGS 6 rev:cf_d [1]
+CF_FLAGS 5 rev:cf_d [1]
CREATE TABLE t3 (a INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
insert into t3 (a) values (1), (2), (3);
SET @ORIG_ROCKSDB_PAUSE_BACKGROUND_WORK = @@GLOBAL.ROCKSDB_PAUSE_BACKGROUND_WORK;
@@ -69,7 +66,7 @@ SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
Variable_name Value
rocksdb_pause_background_work ON
DROP TABLE t3;
-cf_id:0,index_id:268
+cf_id:0,index_id:267
SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0;
SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK';
Variable_name Value
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue111.result b/storage/rocksdb/mysql-test/rocksdb/r/issue111.result
index e15519c3d7a..796a637e787 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/issue111.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue111.result
@@ -32,6 +32,6 @@ update t1 set col2=123456 where pk=0;
commit;
connection default;
update t1 set col2=col2+1 where col1 < 10 limit 5;
-ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
disconnect con2;
drop table t1, ten, one_k;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
new file mode 100644
index 00000000000..d4d211b9288
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result
@@ -0,0 +1,161 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+id INT,
+val1 INT,
+val2 INT,
+PRIMARY KEY (id)
+) ENGINE=rocksdb;
+INSERT INTO t1 VALUES(1,1,1),(2,1,2);
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 1 2
+UPDATE t1 SET val1=2 WHERE id=2;
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 2 2
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t1 VALUES(20,1,1),(30,30,30);
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 2 2
+20 1 1
+30 30 30
+UPDATE t1 SET val1=20, val2=20 WHERE id=20;
+SELECT * FROM t1;
+id val1 val2
+1 1 1
+2 2 2
+20 20 20
+30 30 30
+DELETE FROM t1 WHERE id=30;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+---SNAPSHOT, ACTIVE NUM sec
+MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
+SHOW ENGINE rocksdb TRANSACTION STATUS
+lock count 8, write count 4
+insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+ROLLBACK;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+START TRANSACTION;
+INSERT INTO t1 VALUES(40,40,40);
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+---SNAPSHOT, ACTIVE NUM sec
+MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
+SHOW ENGINE rocksdb TRANSACTION STATUS
+lock count 2, write count 1
+insert count 1, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+COMMIT;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+SET AUTOCOMMIT=1;
+DROP TABLE t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t2 (
+id1 INT,
+id2 INT,
+value INT,
+PRIMARY KEY (id1),
+KEY (id2)
+) ENGINE=rocksdb;
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t2 VALUES(1,2,0),(10,20,30);
+UPDATE t2 SET value=3 WHERE id2=2;
+DELETE FROM t2 WHERE id1=10;
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+---SNAPSHOT, ACTIVE NUM sec
+MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
+SHOW ENGINE rocksdb TRANSACTION STATUS
+lock count 9, write count 7
+insert count 2, update count 1, delete count 1
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+ROLLBACK;
+SET AUTOCOMMIT=1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/issue255.result b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
new file mode 100644
index 00000000000..62875e378a4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/issue255.result
@@ -0,0 +1,21 @@
+CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+INSERT INTO t1 VALUES (5);
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 1 # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ('538647864786478647864');
+Warnings:
+Warning 1264 Out of range value for column 'pk' at row 1
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+INSERT INTO t1 VALUES ();
+ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
+SELECT * FROM t1;
+pk
+5
+9223372036854775807
+SHOW TABLE STATUS LIKE 't1';
+Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
+t1 ROCKSDB # Fixed 2 # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
new file mode 100644
index 00000000000..d0bfb05fd1b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result
@@ -0,0 +1,27 @@
+create table t (a int primary key) engine=rocksdb;
+begin;
+insert into t values (0);
+set @@rocksdb_lock_wait_timeout=1;
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+0
+begin;
+set @@rocksdb_lock_wait_timeout=1;
+begin;
+insert into t values(0);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+1
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+1
+insert into t values(0);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t.PRIMARY
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+2
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+ROW_LOCK_WAIT_TIMEOUTS
+2
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
new file mode 100644
index 00000000000..86ba6d923a8
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result
@@ -0,0 +1,37 @@
+DROP TABLE IF EXISTS T1;
+CREATE TABLE T1 (
+P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+KEY SK (S1(8), S2(8), S3(8), S4(8),
+S5(8), S6(8), S7(8), S8(8),
+S9(8), S10(8), S11(8), S12(8),
+S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+'5', '6', '7', '8',
+'9', '10', '11', '12',
+'13', '14', '15', '16',
+'17', '18', '19', '20');
+SELECT * FROM T1;
+P1 P2 P3 P4 S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16
+1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+# Not eligible for optimization, shorter than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+4 6
+include/assert.inc [Not eligible for optimization, shorter than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+'500000000', '600000000', '700000000', '800000000',
+'900000000', '100000000', '110000000', '120000000',
+'130000000', '140000000', '150000000', '160000000',
+'170000000', '180000000', '190000000', '200000000');
+# Not eligible for optimization, longer than prefix length.
+SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5';
+P4 S2
+include/assert.inc [Not eligible for optimization, longer than prefix length.: 0 rocksdb_covered_secondary_key_lookups]
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result b/storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result
new file mode 100644
index 00000000000..725b74e1291
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result
@@ -0,0 +1,397 @@
+create database linkdb;
+use linkdb;
+create table linktable (
+id1 bigint(20) unsigned NOT NULL DEFAULT '0',
+id2 bigint(20) unsigned NOT NULL DEFAULT '0',
+link_type bigint(20) unsigned NOT NULL DEFAULT '0',
+visibility tinyint(3) NOT NULL DEFAULT '0',
+data varchar(255) NOT NULL DEFAULT '',
+time bigint(20) unsigned NOT NULL DEFAULT '0',
+version int(11) unsigned NOT NULL DEFAULT '0',
+primary key (id1,id2,link_type) COMMENT 'cf_link_pk',
+KEY id1_type (id1,link_type,visibility,time,version,data)
+COMMENT 'rev:cf_link_id1_type') ENGINE=rocksdb DEFAULT COLLATE=latin1_bin;
+create table counttable (
+id bigint(20) unsigned NOT NULL DEFAULT '0',
+link_type bigint(20) unsigned NOT NULL DEFAULT '0',
+count int(10) unsigned NOT NULL DEFAULT '0',
+time bigint(20) unsigned NOT NULL DEFAULT '0',
+version bigint(20) unsigned NOT NULL DEFAULT '0',
+primary key (id,link_type) COMMENT 'cf_count_pk')
+ENGINE=rocksdb DEFAULT COLLATE=latin1_bin;
+create table nodetable (
+id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+type int(10) unsigned NOT NULL,
+version bigint(20) unsigned NOT NULL,
+time int(10) unsigned NOT NULL,
+data mediumtext NOT NULL,
+primary key(id) COMMENT 'cf_node_pk')
+ENGINE=rocksdb DEFAULT COLLATE=latin1_bin;
+#
+# Test nodeGet function
+#
+create native procedure nodeGet soname "NP_EXAMPLE_LIB";
+%nodeGet 1;
+id type version time data
+1 1 1 1000 data
+%nodeGet 50 anything can go here;
+id type version time data
+50 1 1 50000 data
+%nodeGet 39;
+id type version time data
+39 1 1 39000 data
+%nodeGet 98;
+id type version time data
+98 1 1 98000 data
+%nodeGet 1000;
+id type version time data
+%nodeGet -1;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'nodeGet -1')
+%nodeGet asdf;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'nodeGet asdf')
+#
+# Test linkGetRange/linkGetId2s function
+#
+create native procedure linkGetRange soname "NP_EXAMPLE_LIB";
+create native procedure linkGetId2s soname "NP_EXAMPLE_LIB";
+%linkGetRange 1 1 1000 2000 0 1000;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%linkGetRange 1 2 1000 2000 0 1000;
+id1 id2 link_type visibility data time version
+%linkGetRange 1 1 5000 2000 0 1000;
+id1 id2 link_type visibility data time version
+%linkGetRange 1 2 1000 6000 0 5;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+%linkGetRange 1 2 1000 6000 0 2;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+%linkGetRange 1 2 1000 6000 2 2;
+id1 id2 link_type visibility data time version
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+%linkGetId2s 1 3 3 1 2 3;
+id1 id2 link_type visibility data time version
+1 1 3 1 data 3001 1
+1 2 3 1 data 3002 1
+1 3 3 1 data 3003 1
+%linkGetId2s 1 3 3 3 2 1;
+id1 id2 link_type visibility data time version
+1 3 3 1 data 3003 1
+1 2 3 1 data 3002 1
+1 1 3 1 data 3001 1
+%linkGetId2s 1 3 3 3 2 10;
+id1 id2 link_type visibility data time version
+1 3 3 1 data 3003 1
+1 2 3 1 data 3002 1
+%linkGetId2s 1 3 3 3 2 1 asdf;
+id1 id2 link_type visibility data time version
+1 3 3 1 data 3003 1
+1 2 3 1 data 3002 1
+1 1 3 1 data 3001 1
+%linkGetId2s 1 3 0;
+id1 id2 link_type visibility data time version
+%linkGetId2s 1 3 4 2;
+ERROR HY000: Incorrect arguments to native procedure. (query 'linkGetId2s 1 3 4 2')
+#
+# Test rangeQuery function
+#
+create native procedure rangeQuery soname "NP_EXAMPLE_LIB";
+%rangeQuery 1 0 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+%rangeQuery 1 0 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+%rangeQuery 1 1 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+%rangeQuery 1 1 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+%rangeQuery 0 0 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 0 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 1 0 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+%rangeQuery 0 1 1 4 id1 1 link_type 1 visibility 1 time 1001 4 id1 1 link_type 1 visibility 1 time 1005;
+id1 id2 link_type visibility data time version
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+%rangeQuery 1 0 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+1 1 2 1 data 2001 1
+1 2 2 1 data 2002 1
+1 3 2 1 data 2003 1
+1 4 2 1 data 2004 1
+1 5 2 1 data 2005 1
+%rangeQuery 1 0 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 1 1 1 data 1001 1
+1 2 1 1 data 1002 1
+1 3 1 1 data 1003 1
+1 4 1 1 data 1004 1
+1 5 1 1 data 1005 1
+%rangeQuery 1 1 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 1 2 1 data 2001 1
+1 2 2 1 data 2002 1
+1 3 2 1 data 2003 1
+1 4 2 1 data 2004 1
+1 5 2 1 data 2005 1
+%rangeQuery 1 1 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+%rangeQuery 0 0 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 0 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 1 0 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+1 5 2 1 data 2005 1
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+%rangeQuery 0 1 1 2 id1 1 link_type 1 2 id1 1 link_type 2;
+id1 id2 link_type visibility data time version
+%rangeQuery 0 0 0 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 0 1 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+%rangeQuery 0 1 0 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 4 2 1 data 2004 1
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+%rangeQuery 0 1 1 2 id1 1 link_type 1 4 id1 1 link_type 2 visibility 1 time 2004;
+id1 id2 link_type visibility data time version
+1 3 2 1 data 2003 1
+1 2 2 1 data 2002 1
+1 1 2 1 data 2001 1
+#
+# Test countGet function
+#
+create native procedure countGet soname "NP_EXAMPLE_LIB";
+%countGet 1 1;
+count
+2
+%countGet 10 1;
+count
+20
+%countGet 111 1;
+count
+%countGet 1 111;
+count
+%countGet -1 1 1;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'countGet -1 1 1')
+%countGet -1 1 2;
+ERROR HY000: Native procedure failed. (code: 7, msg: 'Invalid arguments: Conversion failed for field id.', query 'countGet -1 1 2')
+%countGet;
+ERROR HY000: Incorrect arguments to native procedure. (query 'countGet')
+#
+# Check that DDL locks are respected.
+#
+create native procedure sleepRange soname "NP_EXAMPLE_LIB";
+%sleepRange 1;
+set @start_lock_wait_timeout = @@session.lock_wait_timeout;
+set lock_wait_timeout = 1;
+drop table counttable;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on table metadata: linkdb.counttable
+set lock_wait_timeout = @start_lock_wait_timeout;
+count
+2
+4
+6
+8
+10
+12
+14
+16
+18
+20
+#
+# Check that queries can be killed.
+#
+%sleepRange 0;
+kill query ID;
+ERROR 70100: Query execution was interrupted
+#
+# Check that native procedures work properly with transactions.
+#
+use linkdb;
+set session transaction isolation level repeatable read;
+%countGet 1 1;
+count
+2
+begin;
+select count from counttable where id = 1 and link_type = 1;
+count
+2
+%countGet 1 1;
+count
+2
+# Before update
+%countGet 1 1;
+count
+2
+update counttable set count = count + 1 where id = 1 and link_type = 1;
+# After update
+%countGet 1 1;
+count
+3
+# Unchanged due to consistent reads
+%countGet 1 1;
+count
+2
+#
+# Check index reads on prefixed data.
+#
+alter table linktable drop index id1_type;
+alter table linktable
+add index id1_type (id1,link_type,visibility,time,version,data(1))
+COMMENT 'rev:cf_link_id1_type';
+%linkGetRange 1 1 1000 2000 0 1000;
+id1 id2 link_type visibility data time version
+1 5 1 1 data 1005 1
+1 4 1 1 data 1004 1
+1 3 1 1 data 1003 1
+1 2 1 1 data 1002 1
+1 1 1 1 data 1001 1
+#
+# Check correct error handling for various scenarios.
+#
+create native procedure invalidKey1 soname "NP_EXAMPLE_LIB";
+%invalidKey1;
+ERROR HY000: Native procedure failed. (code: 6, msg: 'Not found: ', query 'invalidKey1')
+create native procedure invalidOpen1 soname "NP_EXAMPLE_LIB";
+%invalidOpen1;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen1')
+create native procedure invalidOpen2 soname "NP_EXAMPLE_LIB";
+%invalidOpen2;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen2')
+create native procedure invalidOpen3 soname "NP_EXAMPLE_LIB";
+%invalidOpen3;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen3')
+create native procedure invalidOpen4 soname "NP_EXAMPLE_LIB";
+%invalidOpen4;
+ERROR HY000: Native procedure failed. (code: 5, msg: 'Cannot reinitialize: ', query 'invalidOpen4')
+%invalidProcedure;
+ERROR HY000: Unknown native procedure. 'invalidProcedure'
+create native procedure invalidProcedure soname "invalid.so";
+ERROR HY000: Can't open shared library
+create native procedure invalidProcedure soname "NP_EXAMPLE_LIB";
+ERROR HY000: Can't find symbol 'invalidProcedure' in library
+#
+# Check that our functions are reloaded after restart.
+#
+select * from mysql.native_proc order by name;
+name type dl lua
+countGet native np_example.so
+invalidKey1 native np_example.so
+invalidOpen1 native np_example.so
+invalidOpen2 native np_example.so
+invalidOpen3 native np_example.so
+invalidOpen4 native np_example.so
+linkGetId2s native np_example.so
+linkGetRange native np_example.so
+nodeGet native np_example.so
+rangeQuery native np_example.so
+sleepRange native np_example.so
+drop native procedure nodeGet;
+create native procedure nodeGet soname "NP_EXAMPLE_LIB";
+ERROR HY000: Native procedure 'nodeGet' exists.
+drop native procedure linkGetRange;
+drop native procedure linkGetId2s;
+drop native procedure countGet;
+drop native procedure sleepRange;
+drop native procedure rangeQuery;
+drop native procedure invalidKey1;
+drop native procedure invalidOpen1;
+drop native procedure invalidOpen2;
+drop native procedure invalidOpen3;
+drop native procedure invalidOpen4;
+%nodeGet 1;
+ERROR HY000: Unknown native procedure. 'nodeGet'
+#
+# Check that our functions are unloaded after restart.
+#
+select * from mysql.native_proc order by name;
+name type dl lua
+%nodeGet 1;
+ERROR HY000: Unknown native procedure. 'nodeGet'
+drop database linkdb;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result
new file mode 100644
index 00000000000..9c7d189e935
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result
@@ -0,0 +1,76 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (id1 BIGINT, id2 BIGINT, id3 BIGINT, id4 BIGINT, PRIMARY KEY (id1, id2, id3, id4) comment 'cf1') ENGINE=rocksdb collate latin1_bin;
+set global rocksdb_force_flush_memtable_now = 1;
+
+Original Prefix Extractor:
+
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+1
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+1
+
+Prefix Extractor (after override_cf_options set, should not be changed):
+
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+
+Restarting with new Prefix Extractor...
+
+
+Changed Prefix Extractor (after restart):
+
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+CF_NAME OPTION_TYPE VALUE
+__system__ PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+cf1 PREFIX_EXTRACTOR rocksdb.CappedPrefix.26
+default PREFIX_EXTRACTOR rocksdb.CappedPrefix.24
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+1
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+0
+set global rocksdb_force_flush_memtable_now = 1;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+1
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+1
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+COUNT(*)
+2
+UPDATE t1 SET id1=1,id2 = 30,id3 = 30 WHERE id4 >= 0 AND id4 <=10;
+set global rocksdb_force_flush_memtable_now = 1;
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+COUNT(*)
+3
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+COUNT(*)
+0
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+2
+set global rocksdb_compact_cf='cf1';
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=30 AND id3=30;
+COUNT(*)
+11
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+variable_value-@u
+1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
index e945e362f99..6a240d3126c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result
@@ -875,14 +875,13 @@ rocksdb_advise_random_on_open ON
rocksdb_allow_concurrent_memtable_write OFF
rocksdb_allow_mmap_reads OFF
rocksdb_allow_mmap_writes OFF
-rocksdb_background_sync OFF
-rocksdb_base_background_compactions 1
rocksdb_blind_delete_primary_key OFF
rocksdb_block_cache_size 536870912
rocksdb_block_restart_interval 16
rocksdb_block_size 4096
rocksdb_block_size_deviation 10
rocksdb_bulk_load OFF
+rocksdb_bulk_load_allow_unsorted OFF
rocksdb_bulk_load_size 1000
rocksdb_bytes_per_sync 0
rocksdb_cache_index_and_filter_blocks ON
@@ -895,38 +894,51 @@ rocksdb_compaction_sequential_deletes 0
rocksdb_compaction_sequential_deletes_count_sd OFF
rocksdb_compaction_sequential_deletes_file_size 0
rocksdb_compaction_sequential_deletes_window 0
+rocksdb_concurrent_prepare ON
rocksdb_create_checkpoint
rocksdb_create_if_missing ON
rocksdb_create_missing_column_families OFF
rocksdb_datadir ./.rocksdb
rocksdb_db_write_buffer_size 0
rocksdb_deadlock_detect OFF
+rocksdb_deadlock_detect_depth 50
rocksdb_debug_optimizer_no_zero_cardinality ON
+rocksdb_debug_ttl_ignore_pk OFF
+rocksdb_debug_ttl_read_filter_ts 0
+rocksdb_debug_ttl_rec_ts 0
+rocksdb_debug_ttl_snapshot_ts 0
rocksdb_default_cf_options
-rocksdb_delayed_write_rate 16777216
+rocksdb_delayed_write_rate 0
rocksdb_delete_obsolete_files_period_micros 21600000000
rocksdb_enable_2pc ON
rocksdb_enable_bulk_load_api ON
-rocksdb_enable_thread_tracking OFF
+rocksdb_enable_thread_tracking ON
+rocksdb_enable_ttl ON
+rocksdb_enable_ttl_read_filtering ON
rocksdb_enable_write_thread_adaptive_yield OFF
rocksdb_error_if_exists OFF
rocksdb_flush_log_at_trx_commit 0
rocksdb_flush_memtable_on_analyze ON
rocksdb_force_compute_memtable_stats ON
+rocksdb_force_compute_memtable_stats_cachetime 0
+rocksdb_force_flush_memtable_and_lzero_now OFF
rocksdb_force_flush_memtable_now OFF
rocksdb_force_index_records_in_range 0
rocksdb_hash_index_allow_collision ON
rocksdb_index_type kBinarySearch
rocksdb_info_log_level error_level
+rocksdb_io_write_timeout 0
rocksdb_is_fd_close_on_exec ON
rocksdb_keep_log_file_num 1000
+rocksdb_large_prefix OFF
rocksdb_lock_scanned_rows OFF
rocksdb_lock_wait_timeout 1
rocksdb_log_file_time_to_roll 0
rocksdb_manifest_preallocation_size 4194304
+rocksdb_manual_wal_flush ON
rocksdb_master_skip_tx_api OFF
-rocksdb_max_background_compactions 1
-rocksdb_max_background_flushes 1
+rocksdb_max_background_jobs 2
+rocksdb_max_latest_deadlocks 5
rocksdb_max_log_file_size 0
rocksdb_max_manifest_file_size 18446744073709551615
rocksdb_max_open_files -1
@@ -935,6 +947,7 @@ rocksdb_max_subcompactions 1
rocksdb_max_total_wal_size 0
rocksdb_merge_buf_size 67108864
rocksdb_merge_combine_read_size 1073741824
+rocksdb_merge_tmp_file_removal_delay_ms 0
rocksdb_new_table_reader_for_compaction_inputs OFF
rocksdb_no_block_cache OFF
rocksdb_override_cf_options
@@ -948,11 +961,14 @@ rocksdb_print_snapshot_conflict_queries OFF
rocksdb_rate_limiter_bytes_per_sec 0
rocksdb_read_free_rpl_tables
rocksdb_records_in_range 50
+rocksdb_reset_stats OFF
rocksdb_seconds_between_stat_computes 3600
rocksdb_signal_drop_index_thread OFF
+rocksdb_sim_cache_size 0
rocksdb_skip_bloom_filter_on_read OFF
rocksdb_skip_fill_cache OFF
rocksdb_skip_unique_check_tables .*
+rocksdb_sst_mgr_rate_bytes_per_sec 0
rocksdb_stats_dump_period_sec 600
rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
@@ -962,9 +978,11 @@ rocksdb_table_stats_sampling_pct 10
rocksdb_tmpdir
rocksdb_trace_sst_api OFF
rocksdb_unsafe_for_binlog OFF
+rocksdb_update_cf_options
rocksdb_use_adaptive_mutex OFF
+rocksdb_use_clock_cache OFF
+rocksdb_use_direct_io_for_flush_and_compaction OFF
rocksdb_use_direct_reads OFF
-rocksdb_use_direct_writes OFF
rocksdb_use_fsync OFF
rocksdb_validate_tables 1
rocksdb_verify_row_debug_checksums OFF
@@ -974,6 +992,7 @@ rocksdb_wal_recovery_mode 1
rocksdb_wal_size_limit_mb 0
rocksdb_wal_ttl_seconds 0
rocksdb_whole_key_filtering ON
+rocksdb_write_batch_max_bytes 0
rocksdb_write_disable_wal OFF
rocksdb_write_ignore_missing_column_families OFF
create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb;
@@ -1443,7 +1462,7 @@ Rocksdb_rows_inserted #
Rocksdb_rows_read #
Rocksdb_rows_updated #
Rocksdb_rows_deleted_blind #
-Rocksdb_system_rows_deleted #
+rocksdb_rows_expired #
Rocksdb_system_rows_inserted #
Rocksdb_system_rows_read #
Rocksdb_system_rows_updated #
@@ -1453,6 +1472,11 @@ Rocksdb_block_cache_data_miss #
Rocksdb_block_cache_filter_hit #
Rocksdb_block_cache_filter_miss #
Rocksdb_block_cache_hit #
+rocksdb_memtable_total #
+rocksdb_memtable_unflushed #
+rocksdb_queries_point #
+rocksdb_queries_range #
+rocksdb_covered_secondary_key_lookups #
Rocksdb_block_cache_index_hit #
Rocksdb_block_cache_index_miss #
Rocksdb_block_cache_miss #
@@ -1507,6 +1531,17 @@ Rocksdb_write_other #
Rocksdb_write_self #
Rocksdb_write_timedout #
Rocksdb_write_wal #
+rocksdb_stall_l0_file_count_limit_slowdowns #
+rocksdb_stall_locked_l0_file_count_limit_slowdowns #
+rocksdb_stall_l0_file_count_limit_stops #
+rocksdb_stall_locked_l0_file_count_limit_stops #
+rocksdb_stall_pending_compaction_limit_stops #
+rocksdb_stall_pending_compaction_limit_slowdowns #
+rocksdb_stall_memtable_limit_stops #
+rocksdb_stall_memtable_limit_slowdowns #
+rocksdb_stall_total_stops #
+rocksdb_stall_total_slowdowns #
+rocksdb_stall_micros #
select VARIABLE_NAME from INFORMATION_SCHEMA.global_status where VARIABLE_NAME LIKE 'rocksdb%';
VARIABLE_NAME
ROCKSDB_ROWS_DELETED
@@ -1514,10 +1549,16 @@ ROCKSDB_ROWS_INSERTED
ROCKSDB_ROWS_READ
ROCKSDB_ROWS_UPDATED
ROCKSDB_ROWS_DELETED_BLIND
+ROCKSDB_ROWS_EXPIRED
ROCKSDB_SYSTEM_ROWS_DELETED
ROCKSDB_SYSTEM_ROWS_INSERTED
ROCKSDB_SYSTEM_ROWS_READ
ROCKSDB_SYSTEM_ROWS_UPDATED
+ROCKSDB_MEMTABLE_TOTAL
+ROCKSDB_MEMTABLE_UNFLUSHED
+ROCKSDB_QUERIES_POINT
+ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
@@ -1541,9 +1582,6 @@ ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
ROCKSDB_COMPACTION_KEY_DROP_USER
ROCKSDB_FLUSH_WRITE_BYTES
ROCKSDB_GETUPDATESSINCE_CALLS
-ROCKSDB_L0_NUM_FILES_STALL_MICROS
-ROCKSDB_L0_SLOWDOWN_MICROS
-ROCKSDB_MEMTABLE_COMPACTION_MICROS
ROCKSDB_MEMTABLE_HIT
ROCKSDB_MEMTABLE_MISS
ROCKSDB_NO_FILE_CLOSES
@@ -1571,6 +1609,17 @@ ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
ROCKSDB_RATE_LIMIT_DELAY_MILLIS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
+ROCKSDB_STALL_MICROS
ROCKSDB_WAL_BYTES
ROCKSDB_WAL_GROUP_SYNCS
ROCKSDB_WAL_SYNCED
@@ -1587,10 +1636,16 @@ ROCKSDB_ROWS_INSERTED
ROCKSDB_ROWS_READ
ROCKSDB_ROWS_UPDATED
ROCKSDB_ROWS_DELETED_BLIND
+ROCKSDB_ROWS_EXPIRED
ROCKSDB_SYSTEM_ROWS_DELETED
ROCKSDB_SYSTEM_ROWS_INSERTED
ROCKSDB_SYSTEM_ROWS_READ
ROCKSDB_SYSTEM_ROWS_UPDATED
+ROCKSDB_MEMTABLE_TOTAL
+ROCKSDB_MEMTABLE_UNFLUSHED
+ROCKSDB_QUERIES_POINT
+ROCKSDB_QUERIES_RANGE
+ROCKSDB_COVERED_SECONDARY_KEY_LOOKUPS
ROCKSDB_BLOCK_CACHE_ADD
ROCKSDB_BLOCK_CACHE_DATA_HIT
ROCKSDB_BLOCK_CACHE_DATA_MISS
@@ -1614,9 +1669,6 @@ ROCKSDB_COMPACTION_KEY_DROP_OBSOLETE
ROCKSDB_COMPACTION_KEY_DROP_USER
ROCKSDB_FLUSH_WRITE_BYTES
ROCKSDB_GETUPDATESSINCE_CALLS
-ROCKSDB_L0_NUM_FILES_STALL_MICROS
-ROCKSDB_L0_SLOWDOWN_MICROS
-ROCKSDB_MEMTABLE_COMPACTION_MICROS
ROCKSDB_MEMTABLE_HIT
ROCKSDB_MEMTABLE_MISS
ROCKSDB_NO_FILE_CLOSES
@@ -1644,6 +1696,17 @@ ROCKSDB_NUMBER_SUPERVERSION_CLEANUPS
ROCKSDB_NUMBER_SUPERVERSION_RELEASES
ROCKSDB_RATE_LIMIT_DELAY_MILLIS
ROCKSDB_SNAPSHOT_CONFLICT_ERRORS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_LOCKED_L0_FILE_COUNT_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_STOPS
+ROCKSDB_STALL_PENDING_COMPACTION_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_MEMTABLE_LIMIT_STOPS
+ROCKSDB_STALL_MEMTABLE_LIMIT_SLOWDOWNS
+ROCKSDB_STALL_TOTAL_STOPS
+ROCKSDB_STALL_TOTAL_SLOWDOWNS
+ROCKSDB_STALL_MICROS
ROCKSDB_WAL_BYTES
ROCKSDB_WAL_GROUP_SYNCS
ROCKSDB_WAL_SYNCED
@@ -1752,6 +1815,7 @@ INSERT INTO t1 SET id=123, blob_col='' ON DUPLICATE KEY UPDATE bl
DROP TABLE t1;
#
# Issue #17: Automatic per-index column families
+# (Now deprecated)
#
create table t1 (
id int not null,
@@ -1759,40 +1823,7 @@ key1 int,
PRIMARY KEY (id),
index (key1) comment '$per_index_cf'
) engine=rocksdb;
-#Same CF ids with different CF flags
-create table t1_err (
-id int not null,
-key1 int,
-PRIMARY KEY (id),
-index (key1) comment 'test.t1.key1'
-) engine=rocksdb;
-ERROR HY000: Column family ('test.t1.key1') flag (0) is different from an existing flag (2). Assign a new CF flag, or do not change existing CF flag.
-create table t1_err (
-id int not null,
-key1 int,
-PRIMARY KEY (id),
-index (key1) comment 'test.t1.key2'
-) engine=rocksdb;
-drop table t1_err;
-# Unfortunately there is no way to check which column family everything goes to
-insert into t1 values (1,1);
-select * from t1;
-id key1
-1 1
-# Check that ALTER and RENAME are disallowed
-alter table t1 add col2 int;
-ERROR 42000: This version of MariaDB doesn't yet support 'ALTER TABLE on table with per-index CF'
-rename table t1 to t2;
-ERROR 42000: This version of MariaDB doesn't yet support 'ALTER TABLE on table with per-index CF'
-drop table t1;
-# Check detection of typos in $per_index_cf
-create table t1 (
-id int not null,
-key1 int,
-PRIMARY KEY (id),
-index (key1) comment '$per_idnex_cf'
-)engine=rocksdb;
-ERROR 42000: This version of MariaDB doesn't yet support 'column family name looks like a typo of $per_index_cf.'
+ERROR HY000: The per-index column family option has been deprecated
#
# Issue #22: SELECT ... FOR UPDATE takes a long time
#
@@ -2174,7 +2205,9 @@ SET @old_mode = @@sql_mode;
SET sql_mode = 'strict_all_tables';
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
drop table t1;
+set global rocksdb_large_prefix=1;
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
select * from t1;
a b c
@@ -2268,6 +2301,7 @@ INSERT INTO t1 VALUES(1, 2);
INSERT INTO t1 VALUES(1, 3);
SELECT * FROM t1;
id value
+1 3
REPLACE INTO t1 VALUES(4, 4);
ERROR HY000: When unique checking is disabled in MyRocks, INSERT,UPDATE,LOAD statements with clauses that update or replace the key (i.e. INSERT ON DUPLICATE KEY UPDATE, REPLACE) are not allowed. Query: REPLACE INTO t1 VALUES(4, 4)
INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result
index 05ac3f4f62d..99186153796 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result
@@ -348,6 +348,9 @@ ALTER TABLE t2 ADD KEY (`col3`, `col4`) COMMENT 'custom_p5_cfname=another_cf_for
SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='another_cf_for_p5';
cf_name
another_cf_for_p5
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col3 = 0x4 AND col2 = 0x34567;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t2 custom_p2 ref col3 col3 258 const 1 Using where
@@ -407,3 +410,14 @@ cf_name
notsharedcf
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (
+a INT NOT NULL,
+PRIMARY KEY (a) COMMENT 'p1_cfname=foo;'
+) ENGINE=ROCKSDB
+PARTITION BY LIST COLUMNS(a)
+(PARTITION p1 VALUES IN (1) ENGINE = ROCKSDB);
+INSERT INTO t1 values (1);
+TRUNCATE TABLE t1;
+SELECT * FROM t1;
+a
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result
index 925cd2c60db..c09959736dd 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result
@@ -57,7 +57,7 @@ UPDATE t1 SET value=30 WHERE id=3;
COMMIT;
connection con1;
SELECT * FROM t1 WHERE id=3 FOR UPDATE;
-ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict)
ROLLBACK;
disconnect con1;
connection default;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result b/storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result
index b0a1c408006..1b872f82c58 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result
@@ -1,5 +1,6 @@
call mtr.add_suppression("Did not write failed ");
call mtr.add_suppression("Can't open and lock privilege tables");
+call mtr.add_suppression("Attempt to delete the trigger file");
SET @ORIG_EVENT_SCHEDULER = @@EVENT_SCHEDULER;
CREATE TABLE mysql.user_temp LIKE mysql.user;
INSERT mysql.user_temp SELECT * FROM mysql.user;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
index 19d794da848..1bcd3692b4a 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/show_engine.result
@@ -7,14 +7,8 @@ CREATE TABLE t2 (j INT, PRIMARY KEY (j) COMMENT 'rev:cf_t2') ENGINE = ROCKSDB;
CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
PARTITION BY KEY(l) PARTITIONS 4;
-SHOW ENGINE rocksdb STATUS;
-Type Name Status
-DBSTATS rocksdb #
-CF_COMPACTION __system__ #
-CF_COMPACTION cf_t1 #
-CF_COMPACTION default #
-CF_COMPACTION rev:cf_t2 #
-Memory_Stats rocksdb #
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
INSERT INTO t1 VALUES (1), (2), (3);
SELECT COUNT(*) FROM t1;
COUNT(*)
@@ -27,6 +21,19 @@ INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
SELECT COUNT(*) FROM t4;
COUNT(*)
5
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+SHOW ENGINE rocksdb STATUS;
+Type Name Status
+STATISTICS # #
+DBSTATS # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+CF_COMPACTION # #
+MEMORY_STATS # #
+BG_THREADS # #
+BG_THREADS # #
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
CF_NAME STAT_TYPE VALUE
__system__ NUM_IMMUTABLE_MEM_TABLE #
@@ -125,19 +132,30 @@ __system__ PREFIX_EXTRACTOR #
__system__ COMPACTION_STYLE #
__system__ COMPACTION_OPTIONS_UNIVERSAL #
__system__ COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
-__system__ BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
-__system__ BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE #
-__system__ BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
-__system__ BLOCK_BASED_TABLE_FACTORY::CHECKSUM #
-__system__ BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE #
-__system__ BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY #
-__system__ BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING #
-__system__ BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE #
-__system__ BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
-__system__ BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE #
-__system__ BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
-__system__ BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
-__system__ BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION #
+__system__ TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+__system__ TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+__system__ TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+__system__ TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+__system__ TABLE_FACTORY::INDEX_TYPE #
+__system__ TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+__system__ TABLE_FACTORY::CHECKSUM #
+__system__ TABLE_FACTORY::NO_BLOCK_CACHE #
+__system__ TABLE_FACTORY::BLOCK_CACHE #
+__system__ TABLE_FACTORY::BLOCK_CACHE_NAME #
+__system__ TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+__system__ TABLE_FACTORY::CAPACITY #
+__system__ TABLE_FACTORY::NUM_SHARD_BITS #
+__system__ TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+__system__ TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+__system__ TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+__system__ TABLE_FACTORY::PERSISTENT_CACHE #
+__system__ TABLE_FACTORY::BLOCK_SIZE #
+__system__ TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+__system__ TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+__system__ TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+__system__ TABLE_FACTORY::FILTER_POLICY #
+__system__ TABLE_FACTORY::WHOLE_KEY_FILTERING #
+__system__ TABLE_FACTORY::FORMAT_VERSION #
cf_t1 COMPARATOR #
cf_t1 MERGE_OPERATOR #
cf_t1 COMPACTION_FILTER #
@@ -179,19 +197,30 @@ cf_t1 PREFIX_EXTRACTOR #
cf_t1 COMPACTION_STYLE #
cf_t1 COMPACTION_OPTIONS_UNIVERSAL #
cf_t1 COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::CHECKSUM #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
-cf_t1 BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION #
+cf_t1 TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+cf_t1 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+cf_t1 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+cf_t1 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+cf_t1 TABLE_FACTORY::INDEX_TYPE #
+cf_t1 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+cf_t1 TABLE_FACTORY::CHECKSUM #
+cf_t1 TABLE_FACTORY::NO_BLOCK_CACHE #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE_NAME #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+cf_t1 TABLE_FACTORY::CAPACITY #
+cf_t1 TABLE_FACTORY::NUM_SHARD_BITS #
+cf_t1 TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+cf_t1 TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+cf_t1 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+cf_t1 TABLE_FACTORY::PERSISTENT_CACHE #
+cf_t1 TABLE_FACTORY::BLOCK_SIZE #
+cf_t1 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+cf_t1 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+cf_t1 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+cf_t1 TABLE_FACTORY::FILTER_POLICY #
+cf_t1 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+cf_t1 TABLE_FACTORY::FORMAT_VERSION #
default COMPARATOR #
default MERGE_OPERATOR #
default COMPACTION_FILTER #
@@ -233,19 +262,30 @@ default PREFIX_EXTRACTOR #
default COMPACTION_STYLE #
default COMPACTION_OPTIONS_UNIVERSAL #
default COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
-default BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
-default BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE #
-default BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
-default BLOCK_BASED_TABLE_FACTORY::CHECKSUM #
-default BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE #
-default BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY #
-default BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING #
-default BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE #
-default BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
-default BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE #
-default BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
-default BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
-default BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION #
+default TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+default TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+default TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+default TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+default TABLE_FACTORY::INDEX_TYPE #
+default TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+default TABLE_FACTORY::CHECKSUM #
+default TABLE_FACTORY::NO_BLOCK_CACHE #
+default TABLE_FACTORY::BLOCK_CACHE #
+default TABLE_FACTORY::BLOCK_CACHE_NAME #
+default TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+default TABLE_FACTORY::CAPACITY #
+default TABLE_FACTORY::NUM_SHARD_BITS #
+default TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+default TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+default TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+default TABLE_FACTORY::PERSISTENT_CACHE #
+default TABLE_FACTORY::BLOCK_SIZE #
+default TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+default TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+default TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+default TABLE_FACTORY::FILTER_POLICY #
+default TABLE_FACTORY::WHOLE_KEY_FILTERING #
+default TABLE_FACTORY::FORMAT_VERSION #
rev:cf_t2 COMPARATOR #
rev:cf_t2 MERGE_OPERATOR #
rev:cf_t2 COMPACTION_FILTER #
@@ -287,19 +327,30 @@ rev:cf_t2 PREFIX_EXTRACTOR #
rev:cf_t2 COMPACTION_STYLE #
rev:cf_t2 COMPACTION_OPTIONS_UNIVERSAL #
rev:cf_t2 COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::CHECKSUM #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
-rev:cf_t2 BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION #
+rev:cf_t2 TABLE_FACTORY::FLUSH_BLOCK_POLICY_FACTORY #
+rev:cf_t2 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS #
+rev:cf_t2 TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS_WITH_HIGH_PRIORITY #
+rev:cf_t2 TABLE_FACTORY::PIN_L0_FILTER_AND_INDEX_BLOCKS_IN_CACHE #
+rev:cf_t2 TABLE_FACTORY::INDEX_TYPE #
+rev:cf_t2 TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION #
+rev:cf_t2 TABLE_FACTORY::CHECKSUM #
+rev:cf_t2 TABLE_FACTORY::NO_BLOCK_CACHE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_NAME #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_OPTIONS #
+rev:cf_t2 TABLE_FACTORY::CAPACITY #
+rev:cf_t2 TABLE_FACTORY::NUM_SHARD_BITS #
+rev:cf_t2 TABLE_FACTORY::STRICT_CAPACITY_LIMIT #
+rev:cf_t2 TABLE_FACTORY::HIGH_PRI_POOL_RATIO #
+rev:cf_t2 TABLE_FACTORY::BLOCK_CACHE_COMPRESSED #
+rev:cf_t2 TABLE_FACTORY::PERSISTENT_CACHE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE #
+rev:cf_t2 TABLE_FACTORY::BLOCK_SIZE_DEVIATION #
+rev:cf_t2 TABLE_FACTORY::BLOCK_RESTART_INTERVAL #
+rev:cf_t2 TABLE_FACTORY::INDEX_BLOCK_RESTART_INTERVAL #
+rev:cf_t2 TABLE_FACTORY::FILTER_POLICY #
+rev:cf_t2 TABLE_FACTORY::WHOLE_KEY_FILTERING #
+rev:cf_t2 TABLE_FACTORY::FORMAT_VERSION #
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
@@ -309,7 +360,7 @@ Type Name Status
SHOW ENGINE ALL MUTEX;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -317,6 +368,7 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
SNAPSHOTS
---------
LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
@@ -324,7 +376,7 @@ END OF ROCKSDB TRANSACTION MONITOR OUTPUT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
SHOW ENGINE rocksdb TRANSACTION STATUS;
Type Name Status
-SNAPSHOTS rocksdb
+rocksdb
============================================================
TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
============================================================
@@ -336,8 +388,11 @@ LIST OF SNAPSHOTS FOR EACH SESSION:
MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION
SHOW ENGINE rocksdb TRANSACTION STATUS
lock count 0, write count 0
+insert count 0, update count 0, delete count 0
+----------LATEST DETECTED DEADLOCKS----------
-----------------------------------------
END OF ROCKSDB TRANSACTION MONITOR OUTPUT
=========================================
ROLLBACK;
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/singledelete.result b/storage/rocksdb/mysql-test/rocksdb/r/singledelete.result
index ef9fafc852a..da29f325410 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/singledelete.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/singledelete.result
@@ -63,4 +63,24 @@ true
select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
case when variable_value-@d < 10 then 'true' else 'false' end
true
-DROP TABLE t1, t2, t3, t4, t5;
+CREATE TABLE t6 (
+pk VARCHAR(64) COLLATE latin1_swedish_ci PRIMARY KEY
+) ENGINE=RocksDB;
+INSERT INTO t6 VALUES ('a');
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t6;
+pk
+a
+UPDATE t6 SET pk='A' WHERE pk='a';
+SELECT * FROM t6;
+pk
+A
+DELETE FROM t6 where pk='A';
+SELECT should return nothing;
+SELECT * FROM t6;
+pk
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SELECT should return nothing;
+SELECT * FROM t6;
+pk
+DROP TABLE t1, t2, t3, t4, t5, t6;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
new file mode 100644
index 00000000000..36d5eb24e30
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result
@@ -0,0 +1,4 @@
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+set session debug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug= "-d,gen_sql_table_name";
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
index d1e445f734c..9cb7e601e65 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result
@@ -21,7 +21,7 @@ PARTITION P1 VALUES LESS THAN (2000)
DATA DIRECTORY = '/foo/bar/data/',
PARTITION P2 VALUES LESS THAN (MAXVALUE)
);
-ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+ERROR HY000: Got error 195 'Specifying DATA DIRECTORY for an individual table is not supported.' from ROCKSDB
CREATE TABLE t1 (id int not null primary key) ENGINE=rocksdb PARTITION BY RANGE (id)
(
PARTITION P0 VALUES LESS THAN (1000)
@@ -30,4 +30,4 @@ PARTITION P1 VALUES LESS THAN (2000)
INDEX DIRECTORY = '/foo/bar/data/',
PARTITION P2 VALUES LESS THAN (MAXVALUE)
);
-ERROR HY000: Can't create table `test`.`t1` (errno: 140 "Wrong create options")
+ERROR HY000: Got error 196 'Specifying INDEX DIRECTORY for an individual table is not supported.' from ROCKSDB
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
index 813f651be62..eda560fefdb 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result
@@ -1,6 +1,8 @@
call mtr.add_suppression("Column family 'cf1' not found");
call mtr.add_suppression("Column family 'rev:cf2' not found");
DROP TABLE IF EXISTS t1;
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
set global rocksdb_signal_drop_index_thread = 1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
new file mode 100644
index 00000000000..2903e9aa7eb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result
@@ -0,0 +1,489 @@
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int,
+c int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1;
+a
+1
+2
+3
+4
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+3
+4
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1;
+a
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20),
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts int,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+ERROR HY000: TTL duration (abc) in MyRocks must be an unsigned non-null 64-bit integer.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+INSERT INTO t1 values (1);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1);
+SELECT * FROM t1;
+a
+1
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='ttl_duration=1'
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+ALTER TABLE t1 DROP PRIMARY KEY;
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int,
+PRIMARY KEY (a,b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+UPDATE t1 SET a=a+1;
+SELECT * FROM t1;
+a
+10
+2
+4
+6
+8
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1;
+a
+10
+8
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=UNIX_TIMESTAMP() WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+3
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
new file mode 100644
index 00000000000..0a91fe3fcbd
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result
@@ -0,0 +1,238 @@
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1;
+a
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b BIGINT UNSIGNED NOT NULL
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT a FROM t1;
+a
+2
+3
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+2
+3
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+SELECT a FROM t1;
+a
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+a
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_enable_ttl_read_filtering=1;
+SELECT * FROM t1;
+a
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+c int,
+PRIMARY KEY (a,b,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 WHERE a = 1;
+a b c
+SELECT max(a) from t1 where a < 3;
+max(a)
+NULL
+SELECT max(a) from t1 where a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT min(a) from t1 where a >= 1;
+min(a)
+NULL
+SELECT min(a) from t1 where a > 1;
+min(a)
+NULL
+select * from t1 where a=1 and b in (1) order by c desc;
+a b c
+select max(a) from t1 where a <=10;
+max(a)
+NULL
+select a from t1 where a > 0 and a <= 2;
+a
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+8
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+a
+INSERT INTO t1 values (1);
+SELECT * FROM t1;
+a
+1
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+a
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (3,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (5,5);
+UPDATE t1 set a = 1;
+SELECT * FROM t1;
+a b
+1 5
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1;
+a b
+1 5
+3 3
+set global rocksdb_enable_ttl_read_filtering=1;
+UPDATE t1 set a = 999 where a = 1;
+SELECT * FROM t1;
+a b
+999 5
+UPDATE t1 set a = a - 1;
+SELECT * FROM t1;
+a b
+998 5
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 values (1);
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1;
+a
+1
+SELECT * FROM t1;
+a
+1
+# Switching to connection 2
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1;
+a
+# Switching to connection 1
+SELECT * FROM t1;
+a
+1
+UPDATE t1 set a = a + 1;
+SELECT * FROM t1;
+a
+2
+COMMIT;
+SELECT * FROM t1;
+a
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+# On Connection 1
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1;
+a
+# On Connection 2
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# On Connection 1
+SELECT * FROM t1;
+a
+# On Connection 2
+SELECT * FROM t1;
+a
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_enable_ttl_read_filtering=1;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result
new file mode 100644
index 00000000000..3816accad8c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result
@@ -0,0 +1,256 @@
+CREATE TABLE t1 (
+c1 INT,
+PRIMARY KEY (`c1`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (4);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (6);
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (8);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+c1
+1
+2
+3
+4
+5
+6
+7
+8
+9
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1;
+c1
+2
+3
+5
+6
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1;
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+SELECT * FROM t1;
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+SELECT * FROM t1;
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1;
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` int(11) NOT NULL DEFAULT '0',
+ `c2` int(11) NOT NULL DEFAULT '0',
+ `name` varchar(25) NOT NULL,
+ `event` date DEFAULT NULL,
+ PRIMARY KEY (`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;'
+/*!50100 PARTITION BY LIST (c1)
+(PARTITION custom_p0 VALUES IN (1,2,3) ENGINE = ROCKSDB,
+ PARTITION custom_p1 VALUES IN (4,5,6) ENGINE = ROCKSDB,
+ PARTITION custom_p2 VALUES IN (7,8,9) ENGINE = ROCKSDB) */
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1;
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+SELECT c1 FROM t1;
+c1
+4
+5
+6
+7
+8
+9
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1;
+c1
+7
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+PRIMARY KEY (`c1`, `c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=100;ttl_col=c2;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1),
+PARTITION custom_p1 VALUES IN (2),
+PARTITION custom_p2 VALUES IN (3)
+);
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT c1 FROM t1;
+c1
+1
+2
+3
+set global rocksdb_debug_ttl_snapshot_ts = 300;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1;
+c1
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
new file mode 100644
index 00000000000..1f748a3841a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result
@@ -0,0 +1,709 @@
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int,
+c int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a, ts),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+2
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+3
+4
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts bigint(20),
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+ts int,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+ERROR HY000: TTL column (ts) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+ERROR HY000: TTL duration (abc) in MyRocks must be an unsigned non-null 64-bit integer.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a,c),
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+ERROR HY000: TTL column (abc) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration.
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` int(11) NOT NULL,
+ `b` int(11) NOT NULL,
+ PRIMARY KEY (`a`),
+ KEY `kb` (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='ttl_duration=1'
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+ALTER TABLE t1 DROP PRIMARY KEY;
+ERROR HY000: TTL support is currently disabled when table has a hidden PK.
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+2
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) UNSIGNED NOT NULL,
+b int,
+PRIMARY KEY (a,b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+UPDATE t1 SET a=a+1;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+10 0
+2 0
+4 0
+6 0
+8 0
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT * FROM t1;
+a b
+10 0
+8 0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+6
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a bigint(20) NOT NULL,
+b int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+5
+7
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a INT,
+b bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT a FROM t1;
+a
+1
+3
+5
+7
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT a FROM t1;
+a
+1
+3
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*);
+COUNT(*)
+1
+CREATE INDEX kb on t1 (b);
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+COUNT(*)
+0
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
new file mode 100644
index 00000000000..735181d19a2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result
@@ -0,0 +1,494 @@
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b BIGINT UNSIGNED NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# 1 should be hidden
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# none should be hidden yet, compaction runs but records aren't expired
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+2
+3
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+2
+3
+# all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT a FROM t1 FORCE INDEX (kb);
+a
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+# should return everything
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+# should return nothing.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+# Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+a int,
+b int,
+c int,
+PRIMARY KEY (a,b,c),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+a b c
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+max(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+min(a)
+NULL
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+min(a)
+NULL
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+a b c
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+max(a)
+NULL
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+max(a)
+NULL
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+a
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+a
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+0
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+variable_value-@c
+8
+DROP TABLE t1;
+# Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a
+SELECT * FROM t1;
+a
+# No error is thrown here, under the hood index_next_with_direction is
+# filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+# Ensure no rows can disappear in the middle of long-running transactions
+# Also ensure repeatable-read works as expected
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+INSERT INTO t1 values (1, 1);
+# Creating Snapshot (start transaction)
+BEGIN;
+# Nothing filtered out here
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+# Switching to connection 2
+# compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+# read filtered out, because on a different connection, on
+# this connection the records have 'expired' already so they are filtered out
+# even though they have not yet been removed by compaction
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# Switching to connection 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+UPDATE t1 set a = a + 1;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+2 1
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+2 1
+COMMIT;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+DROP TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+a int PRIMARY KEY,
+b int NOT NULL,
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+# On Connection 1
+# Creating Snapshot (start transaction)
+BEGIN;
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# On Connection 1
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+# On Connection 2
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+set global rocksdb_enable_ttl_read_filtering=0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b
+1 1
+3 3
+5 5
+7 7
+SELECT * FROM t1 FORCE INDEX (kb);
+a b
+1 1
+3 3
+5 5
+7 7
+set global rocksdb_enable_ttl_read_filtering=1;
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+ts bigint(20) UNSIGNED NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+# None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+a b
+1 1
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+10 10
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (c1, c2),
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 c
+4 4 d
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+c1 c2 name
+7 7 g
+8 8 h
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+c1 c2 name
+4 4 d
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+c1 c2 name
+4 4 d
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int,
+b int,
+PRIMARY KEY (a),
+KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+# Should see everything
+SELECT * FROM t1;
+a b
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+10 10
+11 11
+12 12
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+# Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+a b
+3 3
+4 4
+5 5
+6 6
+8 8
+9 9
+13 13
+14 14
+15 15
+16 16
+17 17
+18 18
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+a b
+6 6
+8 8
+9 9
+13 13
+14 14
+# Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+a b
+5 5
+6 6
+9 9
+13 13
+14 14
+15 15
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+a b
+5 5
+6 6
+9 9
+# Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+a b
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
new file mode 100644
index 00000000000..e4c361576f5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result
@@ -0,0 +1,82 @@
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
+CREATE TABLE t1 (
+a int NOT NULL,
+b int NOT NULL,
+c int NOT NULL,
+PRIMARY KEY (a),
+KEY kb (b) COMMENT 'kb',
+KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kb);
+a b c
+3 3 3
+SELECT * FROM t1 FORCE INDEX (kc);
+a b c
+3 3 3
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
new file mode 100644
index 00000000000..442dc1167d9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result
@@ -0,0 +1,389 @@
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`),
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+1 1 a
+2 2 aa
+3 3 aaa
+4 4 aaaa
+5 5 aaaaa
+6 6 aaaaaa
+7 7 aaaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+2 2 aa
+3 3 aaa
+5 5 aaaaa
+6 6 aaaaaa
+8 8 aaaaaaaa
+9 9 aaaaaaaaa
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name
+3 3 aaa
+6 6 aaaaaa
+9 9 aaaaaaaaa
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` int(11) NOT NULL DEFAULT '0',
+ `c2` int(11) NOT NULL DEFAULT '0',
+ `name` varchar(25) NOT NULL,
+ `event` date DEFAULT NULL,
+ PRIMARY KEY (`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY `kc2` (`c2`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COMMENT='custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;'
+/*!50100 PARTITION BY LIST (c1)
+(PARTITION custom_p0 VALUES IN (1,2,3) ENGINE = ROCKSDB,
+ PARTITION custom_p1 VALUES IN (4,5,6) ENGINE = ROCKSDB,
+ PARTITION custom_p2 VALUES IN (7,8,9) ENGINE = ROCKSDB) */
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+7 7 seven NULL
+8 8 eight NULL
+9 9 nine NULL
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2 name event
+1 1 one NULL
+2 2 two NULL
+3 3 three NULL
+4 4 four NULL
+5 5 five NULL
+6 6 six NULL
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 BIGINT,
+c2 BIGINT UNSIGNED NOT NULL,
+name VARCHAR(25) NOT NULL,
+event DATE,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 2, 3),
+PARTITION custom_p1 VALUES IN (4, 5, 6),
+PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+4
+5
+6
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+4
+5
+6
+7
+8
+9
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+c1
+7
+8
+9
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+c1
+7
+8
+9
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 INT,
+c2 INT,
+PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+PARTITION custom_p0 VALUES IN (1, 4, 7),
+PARTITION custom_p1 VALUES IN (2, 5, 8),
+PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+SELECT * FROM t1;
+c1 c2
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+CREATE INDEX kc2 on t1 (c2);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+SELECT * FROM t1 FORCE INDEX (kc2);
+c1 c2
+2 2
+3 3
+5 5
+6 6
+8 8
+9 9
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result b/storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result
index 62a3004e584..391649e0e3b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result
@@ -77,4 +77,39 @@ a
Africa,Europe,Asia
S.America,Europe
DROP TABLE t1;
+CREATE TABLE t1 (
+a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
+b SET('test1','test2','test3','test4','test5'),
+c SET('01','22','23','33','34','39','40','44','50','63','64'),
+pk SET('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
+UNIQUE INDEX b_a (b,a)
+) ENGINE=rocksdb;
+SHOW INDEX IN t1;
+Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
+t1 0 PRIMARY 1 pk A 1000 NULL NULL LSMTREE
+t1 0 b_a 1 b A 500 NULL NULL YES LSMTREE
+t1 0 b_a 2 a A 1000 NULL NULL YES LSMTREE
+INSERT INTO t1 (a,b,c,pk) VALUES
+('','test2,test3','01,34,44,23',1),
+('',5,2,2),
+('N.America,Asia','test4,test2','',3),
+('Africa,Europe,Asia','test2,test3','01',4),
+('Antarctica','test3','34,44',5),
+('Asia','test5','50',6),
+('Europe,S.America','test1,','39',7);
+Warnings:
+Warning 1265 Data truncated for column 'b' at row 7
+EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL # Using temporary; Using filesort
+SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
+a b
+ test1,test3
+ test2,test3
+Africa,Europe,Asia test2,test3
+Antarctica test3
+Asia test5
+N.America,Asia test2,test4
+S.America,Europe test1
+DROP TABLE t1;
SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
index b329718e4e0..365a9b7b3b3 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result
@@ -190,6 +190,9 @@ insert into t1 values (0, 'ab', 'a-b');
insert into t1 values (1, 'a ', 'a-space');
insert into t1 values (2, 'a', 'a');
insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
# Must show 'using index' for latin1_bin and utf8_bin:
explain
select col1, hex(col1) from t1;
@@ -302,6 +305,9 @@ insert into t1 values (0, 'ab', 'a-b');
insert into t1 values (1, 'a ', 'a-space');
insert into t1 values (2, 'a', 'a');
insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
# Must show 'using index' for latin1_bin and utf8_bin:
explain
select col1, hex(col1) from t1;
@@ -414,6 +420,9 @@ insert into t1 values (0, 'ab', 'a-b');
insert into t1 values (1, 'a ', 'a-space');
insert into t1 values (2, 'a', 'a');
insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
# Must show 'using index' for latin1_bin and utf8_bin:
explain
select col1, hex(col1) from t1;
@@ -526,6 +535,9 @@ insert into t1 values (0, 'ab', 'a-b');
insert into t1 values (1, 'a ', 'a-space');
insert into t1 values (2, 'a', 'a');
insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
# Must show 'using index' for latin1_bin and utf8_bin:
explain
select col1, hex(col1) from t1;
@@ -638,6 +650,9 @@ insert into t1 values (0, 'ab', 'a-b');
insert into t1 values (1, 'a ', 'a-space');
insert into t1 values (2, 'a', 'a');
insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
# Must show 'using index' for latin1_bin and utf8_bin:
explain
select col1, hex(col1) from t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
index 59fb1e41bdd..414e6279ab3 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result
@@ -1,4 +1,4 @@
call mtr.add_suppression("rocksdb");
call mtr.add_suppression("Aborting");
FOUND 1 /enable both use_direct_reads/ in mysqld.1.err
-FOUND 1 /enable both use_direct_writes/ in mysqld.1.err
+FOUND 1 /enable both use_direct_io_for_flush_and_compaction/ in mysqld.1.err
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result b/storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result
new file mode 100644
index 00000000000..9362d42515c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result
@@ -0,0 +1,260 @@
+CREATE TABLE t1(
+vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+SELECT hex(vb) FROM t1;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+000000000000000001
+000000000000000002
+000000000000000003
+000000000000000004
+000000000000000005
+000000000000000006
+000000000000000007
+000000000000000008
+000000000000000009000000000000000001
+000000000000000009000000000000000002
+000000000000000009000000000000000009000000000000000009000000000000000007
+000000000000000009000000000000000009000000000000000009000000000000000008
+000000000000000009000000000000000009000000000000000009000000000000000009000000000000000001
+ROLLBACK;
+DROP TABLE t1;
+set session debug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+SELECT hex(vb) FROM t1;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+hex(vb)
+00
+0000
+000000
+00000000
+0000000000
+000000000000
+00000000000000
+0000000000000000
+000000000000000000
+00000000000000000000
+00000000000000000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+0000000000000000f8
+0000000000000000f9
+0000000000000000fa
+0000000000000000fb
+0000000000000000fc
+0000000000000000fd
+0000000000000000fe
+0000000000000000ff0000000000000000f7
+0000000000000000ff0000000000000000f8
+0000000000000000ff0000000000000000f9
+0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000fe
+0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000f7
+0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000ff0000000000000000f8
+ROLLBACK;
+DROP TABLE t1;
+CREATE TABLE t1(
+vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+SELECT * FROM t1;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+610000000000000001
+616100000000000002
+616161000000000003
+616161610000000004
+616161616100000005
+616161616161000006
+616161616161610007
+616161616161616108
+616161616161616109610000000000000001
+616161616161616109616100000000000002
+616161616161616109616161616161616109616161616161616109616161616161610007
+616161616161616109616161616161616109616161616161616109616161616161616108
+616161616161616109616161616161616109616161616161616109616161616161616109610000000000000001
+ROLLBACK;
+DROP TABLE t1;
+set session debug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+SELECT * FROM t1;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+vc
+a
+aa
+aaa
+aaaa
+aaaaa
+aaaaaa
+aaaaaaa
+aaaaaaaa
+aaaaaaaaa
+aaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+SUBSTRING(a.key,9)
+6100000000000000f8
+6161000000000000f9
+6161610000000000fa
+6161616100000000fb
+6161616161000000fc
+6161616161610000fd
+6161616161616100fe
+6161616161616161ff0000000000000000f7
+6161616161616161ff6100000000000000f8
+6161616161616161ff6161000000000000f9
+6161616161616161ff6161616161616161ff6161616161616161ff6161616161616100fe
+6161616161616161ff6161616161616161ff6161616161616161ff6161616161616161ff0000000000000000f7
+6161616161616161ff6161616161616161ff6161616161616161ff6161616161616161ff6100000000000000f8
+ROLLBACK;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
index 8f8495302e7..ee23446eec0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/write_sync.result
@@ -1,40 +1,29 @@
SET GLOBAL rocksdb_write_disable_wal=false;
SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
-set @save_rocksdb_flush_log_at_trx_commit= @@global.rocksdb_flush_log_at_trx_commit;
-SET LOCAL rocksdb_flush_log_at_trx_commit=0;
+set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
-0
+1
insert aaa(id, i) values(2,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
-0
+2
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
-0
-SET LOCAL rocksdb_flush_log_at_trx_commit=1;
+3
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(4,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
-1
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(5,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
-2
-insert aaa(id, i) values(6,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-variable_value-@a
-3
-SET GLOBAL rocksdb_background_sync=on;
-SET LOCAL rocksdb_flush_log_at_trx_commit=0;
-insert aaa(id, i) values(7,1);
truncate table aaa;
drop table aaa;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
+set @@global.rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_write_disable_wal=false;
SET GLOBAL rocksdb_write_ignore_missing_column_families=false;
-SET GLOBAL rocksdb_background_sync=off;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
index 009fd7beaf2..923c9db17f2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test
@@ -13,7 +13,7 @@ USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
--echo # 2PC enabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
--echo ## 2PC + durability + single thread
@@ -28,7 +28,7 @@ select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true'
--echo # 2PC enabled, MyRocks durability disabled
-SET GLOBAL rocksdb_enable_2pc=0;
+SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
@@ -41,7 +41,7 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa
--echo # 2PC disabled, MyRocks durability enabled
-SET GLOBAL rocksdb_enable_2pc=1;
+SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
index bb95f71456c..c1a91c2a5a2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test
@@ -173,7 +173,7 @@ set @tmp_rocksdb_strict_collation_check= @@rocksdb_strict_collation_check;
set global rocksdb_strict_collation_check=1;
CREATE TABLE t1 (a INT, b TEXT);
---error 1105
+--error ER_UNSUPPORTED_COLLATION
ALTER TABLE t1 ADD KEY kb(b(10));
ALTER TABLE t1 ADD PRIMARY KEY(a);
DROP TABLE t1;
@@ -190,8 +190,8 @@ connect (con1,localhost,root,,);
--echo # Switch to connection con1
connection con1;
-show global variables like 'rocksdb_bulk_load';
-show session variables like 'rocksdb_bulk_load';
+show global variables like 'rocksdb_bulk_load%';
+show session variables like 'rocksdb_bulk_load%';
CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
@@ -259,9 +259,9 @@ SET @prior_rocksdb_merge_buf_size = @@rocksdb_merge_buf_size;
SET global rocksdb_strict_collation_check = off;
SET session rocksdb_merge_combine_read_size = 566;
-SET session rocksdb_merge_buf_size = 336;
+SET session rocksdb_merge_buf_size = 340;
-show variables like '%rocksdb_bulk_load%';
+show variables like 'rocksdb_bulk_load%';
CREATE TABLE t1 (a VARCHAR(80)) ENGINE=RocksDB;
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
INSERT INTO t1 (a) VALUES (REPEAT("a", 80));
@@ -345,3 +345,48 @@ while ($i <= $max) {
#SHOW TABLE STATUS WHERE name LIKE 't1';
DROP TABLE t1;
+
+# https://github.com/facebook/mysql-5.6/issues/602
+# Add then drop same index should be optimized out.
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,3,3);
+SHOW CREATE TABLE t1;
+
+ALTER TABLE t1 DROP INDEX kbc, ADD INDEX kbc(b,c), ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX kbc;
+DROP TABLE t1;
+
+# Make sure changing key part prefix length causes index rebuild as well.
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b varchar(10),
+index kb(b(5))
+) ENGINE = ROCKSDB charset utf8 collate utf8_bin;
+
+INSERT INTO t1 (a,b) VALUES (1,'1111122222');
+INSERT INTO t1 (a,b) VALUES (2,'2222233333');
+INSERT INTO t1 (a,b) VALUES (3,'3333344444');
+
+--let $start_max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+
+ALTER TABLE t1 DROP INDEX kb, ADD INDEX kb(b(8)), ALGORITHM=INPLACE;
+SELECT * FROM t1 FORCE INDEX(kb);
+
+--let $end_max_index_id = query_get_value(SELECT * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where type = 'MAX_INDEX_ID', VALUE, 1)
+
+if ($end_max_index_id <= $start_max_index_id) {
+ echo Max index ID did not increase;
+}
+
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test b/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test
index 375a63c3a38..cf0546d182c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test
@@ -72,11 +72,30 @@ INSERT INTO t1 (a, b) VALUES (1, 5);
INSERT INTO t1 (a, b) VALUES (2, 6);
INSERT INTO t1 (a, b) VALUES (3, 7);
-# should fail, can't add unique index on table w/ no pk
---error 1105
+# should succeed, we can add unique index on table w/ no pk
ALTER TABLE t1 ADD UNIQUE INDEX kb(b);
+INSERT INTO t1 (a, b) VALUES (4, 8);
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a, b) VALUES (5, 5);
SHOW CREATE TABLE t1;
DROP TABLE t1;
+# Test changing key with same name and parts from not unique to unique, should
+# fail.
+CREATE TABLE t1 (
+a INT PRIMARY KEY,
+b INT,
+c INT,
+KEY kbc(b,c)) ENGINE = ROCKSDB;
+INSERT INTO t1 (a,b,c) VALUES (1,1,1);
+INSERT INTO t1 (a,b,c) VALUES (2,2,2);
+INSERT INTO t1 (a,b,c) VALUES (3,2,2);
+
+# should cause error here, duplicate value 2-2
+--error 1062
+ALTER TABLE t1 DROP INDEX kbc, ADD UNIQUE INDEX kbc(b,c), ALGORITHM=INPLACE;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test
index d1fe15b98fe..2a064dc3b00 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test
@@ -71,9 +71,16 @@ CHECK TABLE t1 CHANGED;
DROP TABLE t1, t2;
-# test disabling unique keys
---error 1105
+# test unique keys with no primary key
CREATE TABLE t1 (a INT, b CHAR(8), UNIQUE INDEX(a)) ENGINE=rocksdb;
+INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
+--error ER_DUP_ENTRY
+INSERT INTO t1 (a,b) VALUES (1,'c');
+SELECT * FROM t1;
+SELECT * FROM t1 WHERE a = 2;
+EXPLAIN SELECT * FROM t1 WHERE a = 2;
+
+DROP TABLE t1;
## test restarting a table that has no data
CREATE TABLE t1 (a INT, b CHAR(8)) ENGINE=rocksdb;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt
index 8600e9e415c..bf9d0624522 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt
@@ -1,2 +1,2 @@
--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:20
---rocksdb_override_cf_options=cf_short_prefix={prefix_extractor=capped:4};cf_long_prefix={prefix_extractor=capped:240}
+--rocksdb_override_cf_options=cf_short_prefix={prefix_extractor=capped:4};rev:cf_short_prefix={prefix_extractor=capped:4};cf_long_prefix={prefix_extractor=capped:240};rev:cf_long_prefix={prefix_extractor=capped:240}
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc
index b388a8036ad..9d62f590c04 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc
@@ -23,12 +23,20 @@ DELIMITER ;//
--source bloomfilter_table_def.inc
--source bloomfilter_load_select.inc
+--exec sed s/##CF##/" COMMENT 'rev:cf_short_prefix'"/g $tmpl_ddl > $ddl
+--source $ddl
+--source suite/rocksdb/t/bloomfilter_load_select.inc
#BF is most of the time invoked and useful
--let $CF=COMMENT 'cf_long_prefix'
--source bloomfilter_table_def.inc
--source bloomfilter_load_select.inc
+--exec sed s/##CF##/" COMMENT 'rev:cf_long_prefix'"/g $tmpl_ddl > $ddl
+--source $ddl
+--source suite/rocksdb/t/bloomfilter_load_select.inc
+
+
# BUG: Prev() with prefix lookup should not use prefix bloom filter
create table r1 (id1 bigint, id2 bigint, id3 bigint, v1 int, v2 text, primary key (id1, id2, id3)) engine=rocksdb DEFAULT CHARSET=latin1 collate latin1_bin;
--disable_query_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc
new file mode 100644
index 00000000000..42cab5ad8c1
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.inc
@@ -0,0 +1,156 @@
+--disable_warnings
+DROP TABLE IF EXISTS t1, t2, t3;
+--enable_warnings
+
+if ($data_order_desc)
+{
+ --echo Data will be ordered in descending order
+}
+
+if (!$data_order_desc)
+{
+ --echo Data will be ordered in ascending order
+}
+
+# Create a table with a primary key and one secondary key as well as one
+# more column
+eval CREATE TABLE t1(
+ pk CHAR(5),
+ a CHAR(30),
+ b CHAR(30),
+ PRIMARY KEY(pk) COMMENT "$pk_cf",
+ KEY(a)
+) COLLATE 'latin1_bin';
+
+# Create a second identical table to validate that bulk loading different
+# tables in the same session works
+eval CREATE TABLE t2(
+ pk CHAR(5),
+ a CHAR(30),
+ b CHAR(30),
+ PRIMARY KEY(pk) COMMENT "$pk_cf",
+ KEY(a)
+) COLLATE 'latin1_bin';
+
+# Create a third table using partitions to validate that bulk loading works
+# across a partitioned table
+eval CREATE TABLE t3(
+ pk CHAR(5),
+ a CHAR(30),
+ b CHAR(30),
+ PRIMARY KEY(pk) COMMENT "$pk_cf",
+ KEY(a)
+) COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+
+--let MTR_DATA_ORDER_DESC = $data_order_desc;
+
+# Create a text file with data to import into the table.
+# The primary key is in sorted order and the secondary keys are randomly generated
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+my $max = 5000000;
+my $desc = $ENV{'MTR_DATA_ORDER_DESC'};
+my @chars = ("A".."Z", "a".."z", "0".."9");
+my @powers_of_26 = (26 * 26 * 26 * 26, 26 * 26 * 26, 26 * 26, 26, 1);
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $pk;
+ my $tmp = $ii;
+ foreach (@powers_of_26)
+ {
+ if ($desc == 1)
+ {
+ $pk .= chr(ord('z') - int($tmp / $_));
+ }
+ else
+ {
+ $pk .= chr(ord('a') + int($tmp / $_));
+ }
+
+ $tmp = $tmp % $_;
+ }
+
+ my $num = int(rand(25)) + 6;
+ my $a;
+ $a .= $chars[rand(@chars)] for 1..$num;
+
+ $num = int(rand(25)) + 6;
+ my $b;
+ $b .= $chars[rand(@chars)] for 1..$num;
+ print $fh "$pk\t$a\t$b\n";
+}
+close($fh);
+EOF
+
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(pk) from t1;
+select count(a) from t1;
+select count(b) from t1;
+select count(pk) from t2;
+select count(a) from t2;
+select count(b) from t2;
+select count(pk) from t3;
+select count(a) from t3;
+select count(b) from t3;
+
+# Create a dummy file with a bulk load extesion. It should be removed when
+# the server starts
+--let $tmpext = .bulk_load.tmp
+--let $MYSQLD_DATADIR= `SELECT @@datadir`
+--let $datadir = $MYSQLD_DATADIR/.rocksdb
+--write_file $datadir/test$tmpext
+dummy data
+EOF
+--write_file $datadir/longfilenamethatvalidatesthatthiswillgetdeleted$tmpext
+dummy data
+EOF
+
+# Show the files exists
+--list_files $datadir *$tmpext
+
+# Now restart the server and make sure it automatically removes this test file
+--source include/restart_mysqld.inc
+
+# Show the files do not exist
+--list_files $datadir *$tmpext
+
+# Cleanup
+disconnect other;
+DROP TABLE t1, t2, t3;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
index 283cedcb6bb..43b1d3e8415 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test
@@ -4,121 +4,8 @@
# This test requires ~2.3G of disk space
--source include/big_test.inc
---disable_warnings
-DROP TABLE IF EXISTS t1, t2, t3;
---enable_warnings
+--let pk_cf=cf1
+--let data_order_desc=0
-# Create a table with a primary key and one secondary key as well as one
-# more column
-CREATE TABLE t1(pk CHAR(5) PRIMARY KEY, a char(30), b char(30), key(a)) COLLATE 'latin1_bin';
-
-# Create a second identical table to validate that bulk loading different
-# tables in the same session works
-CREATE TABLE t2(pk CHAR(5) PRIMARY KEY, a char(30), b char(30), key(a)) COLLATE 'latin1_bin';
-
-# Create a third table using partitions to validate that bulk loading works
-# across a partitioned table
-CREATE TABLE t3(pk CHAR(5) PRIMARY KEY, a char(30), b char(30), key(a)) COLLATE 'latin1_bin'
- PARTITION BY KEY() PARTITIONS 4;
-
---let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
-
-# Create a text file with data to import into the table.
-# The primary key is in sorted order and the secondary keys are randomly generated
---let ROCKSDB_INFILE = $file
-perl;
-my $fn = $ENV{'ROCKSDB_INFILE'};
-open(my $fh, '>>', $fn) || die "perl open($fn): $!";
-my $max = 5000000;
-my @chars = ("A".."Z", "a".."z", "0".."9");
-my @lowerchars = ("a".."z");
-my @powers_of_26 = (26 * 26 * 26 * 26, 26 * 26 * 26, 26 * 26, 26, 1);
-for (my $ii = 0; $ii < $max; $ii++)
-{
- my $pk;
- my $tmp = $ii;
- foreach (@powers_of_26)
- {
- $pk .= $lowerchars[$tmp / $_];
- $tmp = $tmp % $_;
- }
-
- my $num = int(rand(25)) + 6;
- my $a;
- $a .= $chars[rand(@chars)] for 1..$num;
-
- $num = int(rand(25)) + 6;
- my $b;
- $b .= $chars[rand(@chars)] for 1..$num;
- print $fh "$pk\t$a\t$b\n";
-}
-close($fh);
-EOF
-
---file_exists $file
-
-# Make sure a snapshot held by another user doesn't block the bulk load
-connect (other,localhost,root,,);
-set session transaction isolation level repeatable read;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-start transaction with consistent snapshot;
-select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
-
-connection default;
-set rocksdb_bulk_load=1;
-set rocksdb_bulk_load_size=100000;
---disable_query_log
---echo LOAD DATA INFILE <input_file> INTO TABLE t1;
-eval LOAD DATA INFILE '$file' INTO TABLE t1;
---echo LOAD DATA INFILE <input_file> INTO TABLE t2;
-eval LOAD DATA INFILE '$file' INTO TABLE t2;
---echo LOAD DATA INFILE <input_file> INTO TABLE t3;
-eval LOAD DATA INFILE '$file' INTO TABLE t3;
---enable_query_log
-set rocksdb_bulk_load=0;
-
-# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
-SHOW TABLE STATUS WHERE name LIKE 't%';
-
-ANALYZE TABLE t1, t2, t3;
-
---replace_column 6 # 7 # 8 # 9 #
-SHOW TABLE STATUS WHERE name LIKE 't%';
-
-# Make sure all the data is there.
-select count(pk) from t1;
-select count(a) from t1;
-select count(b) from t1;
-select count(pk) from t2;
-select count(a) from t2;
-select count(b) from t2;
-select count(pk) from t3;
-select count(a) from t3;
-select count(b) from t3;
-
-# Create a dummy file with a bulk load extesion. It should be removed when
-# the server starts
---let $tmpext = .bulk_load.tmp
---let $MYSQLD_DATADIR= `SELECT @@datadir`
---let $datadir = $MYSQLD_DATADIR/.rocksdb
---write_file $datadir/test$tmpext
-dummy data
-EOF
---write_file $datadir/longfilenamethatvalidatesthatthiswillgetdeleted$tmpext
-dummy data
-EOF
-
-# Show the files exists
---list_files $datadir *$tmpext
-
-# Now restart the server and make sure it automatically removes this test file
---source include/restart_mysqld.inc
-
-# Show the files do not exist
---list_files $datadir *$tmpext
-
-# Cleanup
-disconnect other;
-DROP TABLE t1, t2, t3;
+--source bulk_load.inc
--remove_file $file
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
new file mode 100644
index 00000000000..b80361f325c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test
@@ -0,0 +1,65 @@
+--source include/have_rocksdb.inc
+
+### Bulk load ###
+CREATE TABLE t1(pk INT, PRIMARY KEY(pk));
+
+# Make sure we get an error with out of order keys during bulk load
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(10);
+INSERT INTO t1 VALUES(11);
+--error ER_KEYS_OUT_OF_ORDER
+INSERT INTO t1 VALUES(9);
+SET rocksdb_bulk_load=0;
+
+# Make sure only 10 and 11 got into the table
+SELECT * FROM t1;
+
+# Make sure we get an error with overlapping data
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(1);
+INSERT INTO t1 VALUES(2);
+INSERT INTO t1 VALUES(20);
+INSERT INTO t1 VALUES(21);
+
+# This last crashes the server (intentionally) because we can't return any
+# error information from a SET <variable>=<value>
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 2013
+SET rocksdb_bulk_load=0;
+
+--exec grep "RocksDB: Error 197 finalizing last SST file while setting bulk loading variable" $MYSQLTEST_VARDIR/log/mysqld.1.err | cut -d] -f2
+--exec echo "" >$MYSQLTEST_VARDIR/log/mysqld.1.err
+
+# restart the crashed server
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+
+# Make sure the error exists in the .err log and then restart the server
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+TRUNCATE TABLE t1;
+
+### Bulk load with unsorted PKs ###
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+# We should not get an error with out of order PKs
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(100);
+INSERT INTO t1 VALUES(101);
+INSERT INTO t1 VALUES(99);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+TRUNCATE TABLE t1;
+
+# We should get an error with duplicate PKs in the same bulk load
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES(201);
+INSERT INTO t1 VALUES(200);
+INSERT INTO t1 VALUES(202);
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES(201);
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+
+SET rocksdb_bulk_load_allow_unsorted=DEFAULT;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
new file mode 100644
index 00000000000..5aec6ff5e99
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=rev:cf1
+--let data_order_desc=0
+
+--source bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
new file mode 100644
index 00000000000..83006f9e446
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=rev:cf1
+--let data_order_desc=1
+
+--source bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
new file mode 100644
index 00000000000..df7b6a7c821
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=cf1
+--let data_order_desc=1
+
+--source bulk_load.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
new file mode 100644
index 00000000000..9cd3aaeafac
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test
@@ -0,0 +1,136 @@
+--source include/have_rocksdb.inc
+
+--let pk_cf=cf1
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+SET rocksdb_bulk_load_size=3;
+SET rocksdb_bulk_load_allow_unsorted=1;
+
+### Test individual INSERTs ###
+
+# A table with only a PK won't have rows until the bulk load is finished
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# A table with a PK and a SK shows rows immediately
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b));
+SET rocksdb_bulk_load=1;
+--disable_query_log
+let $sign = 1;
+let $max = 5;
+let $i = 1;
+while ($i <= $max) {
+ let $a = 1 + $sign * $i;
+ let $b = 1 - $sign * $i;
+ let $sign = -$sign;
+ let $insert = INSERT INTO t1 VALUES ($a, $b);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+DROP TABLE t1;
+
+# Inserting into another table finishes bulk load to the previous table
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+
+SET rocksdb_bulk_load=1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t2 VALUES (1,1);
+SELECT * FROM t1;
+INSERT INTO t1 VALUES (2,2);
+SELECT * FROM t2;
+SELECT * FROM t1;
+SET rocksdb_bulk_load=0;
+SELECT * FROM t1;
+DROP TABLE t1, t2;
+
+### Test bulk load from a file ###
+eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf");
+eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf");
+eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf")
+ PARTITION BY KEY() PARTITIONS 4;
+
+--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")`
+# Create a text file with data to import into the table.
+# PK and SK are not in any order
+--let ROCKSDB_INFILE = $file
+perl;
+my $fn = $ENV{'ROCKSDB_INFILE'};
+open(my $fh, '>', $fn) || die "perl open($fn): $!";
+my $max = 5000000;
+my $sign = 1;
+for (my $ii = 0; $ii < $max; $ii++)
+{
+ my $a = 1 + $sign * $ii;
+ my $b = 1 - $sign * $ii;
+ print $fh "$a\t$b\n";
+}
+close($fh);
+EOF
+--file_exists $file
+
+# Make sure a snapshot held by another user doesn't block the bulk load
+connect (other,localhost,root,,);
+set session transaction isolation level repeatable read;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+start transaction with consistent snapshot;
+select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
+
+connection default;
+set rocksdb_bulk_load=1;
+set rocksdb_bulk_load_size=100000;
+--disable_query_log
+--echo LOAD DATA INFILE <input_file> INTO TABLE t1;
+eval LOAD DATA INFILE '$file' INTO TABLE t1;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t2;
+eval LOAD DATA INFILE '$file' INTO TABLE t2;
+--echo LOAD DATA INFILE <input_file> INTO TABLE t3;
+eval LOAD DATA INFILE '$file' INTO TABLE t3;
+--enable_query_log
+set rocksdb_bulk_load=0;
+
+--remove_file $file
+
+# Make sure row count index stats are correct
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+ANALYZE TABLE t1, t2, t3;
+
+--replace_column 6 # 7 # 8 # 9 #
+SHOW TABLE STATUS WHERE name LIKE 't%';
+
+# Make sure all the data is there.
+select count(a) from t1;
+select count(b) from t1;
+select count(a) from t2;
+select count(b) from t2;
+select count(a) from t3;
+select count(b) from t3;
+
+DROP TABLE t1, t2, t3;
+SET rocksdb_bulk_load_allow_unsorted=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
new file mode 100644
index 00000000000..067a91d5d8b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test
@@ -0,0 +1,8 @@
+--source include/have_rocksdb.inc
+--source include/have_nodebug.inc
+
+# Cannot change unsorted input preference during bulk load
+SET rocksdb_bulk_load=1;
+--error ER_ERROR_WHEN_EXECUTING_COMMAND
+SET rocksdb_bulk_load_allow_unsorted=1;
+SET rocksdb_bulk_load=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
new file mode 100644
index 00000000000..f0361707355
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS stats_test_table;
+--enable_warnings
+
+# Create the table
+CREATE TABLE stats_test_table (a INT, b INT, PRIMARY KEY (a)) ENGINE=ROCKSDB;
+
+# Ensure appropriate perf_context_level is set
+SET GLOBAL rocksdb_perf_context_level=3;
+
+# Insert some values
+INSERT INTO stats_test_table VALUES (7,1);
+INSERT INTO stats_test_table VALUES (2,2);
+
+# Verify the bytes written are updated in the table stats
+SELECT io_write_bytes > 0 FROM INFORMATION_SCHEMA.TABLE_STATISTICS WHERE TABLE_NAME = "stats_test_table";
+
+# Cleanup
+DROP TABLE stats_test_table;
+SET GLOBAL rocksdb_perf_context_level=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/collation.test b/storage/rocksdb/mysql-test/rocksdb/t/collation.test
index 29c2c2886b5..7152925dca4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/collation.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/collation.test
@@ -14,9 +14,9 @@ CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), va
DROP TABLE t1;
# ci indexed column is not allowed
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value)) engine=rocksdb charset utf8;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE t1 (id INT primary key, value varchar(50), value2 varbinary(50), value3 text, index(value3(50))) engine=rocksdb charset utf8;
# ci indexed column with rocksdb_strict_collation_check=OFF is allowed.
SET GLOBAL rocksdb_strict_collation_check=0;
@@ -44,20 +44,20 @@ DROP TABLE t1;
SET GLOBAL rocksdb_strict_collation_exceptions=t1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE t2 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test regex for exception list
SET GLOBAL rocksdb_strict_collation_exceptions="t.*";
CREATE TABLE t123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t123;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE s123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
SET GLOBAL rocksdb_strict_collation_exceptions=".t.*";
CREATE TABLE xt123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE xt123;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE t123 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list with commas
@@ -66,7 +66,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list with vertical bar
@@ -75,7 +75,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list and extra comma at the front
@@ -84,7 +84,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list and extra vertical bar at the front
@@ -93,7 +93,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list and extra comma in the middle
@@ -102,7 +102,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list and extra vertical bar in the middle
@@ -111,7 +111,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list and extra comma at the end
@@ -120,7 +120,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list and extra vertical bar at the end
@@ -129,7 +129,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test multiple entries in the list and tons of commas and vertical bars just for the fun of it
@@ -138,7 +138,7 @@ CREATE TABLE s1 (id INT primary key, value varchar(50), index(value)) engine=roc
DROP TABLE s1;
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE u1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb charset utf8;
# test allowing alters to create temporary tables
@@ -146,10 +146,10 @@ SET GLOBAL rocksdb_strict_collation_exceptions='t1';
CREATE TABLE t1 (id INT primary key, value varchar(50), index(value)) engine=rocksdb;
ALTER TABLE t1 AUTO_INCREMENT=1;
DROP TABLE t1;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE t2 (id INT primary key, value varchar(50), index(value)) engine=rocksdb;
CREATE TABLE t2 (id INT primary key, value varchar(50)) engine=rocksdb;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
ALTER TABLE t2 ADD INDEX(value);
DROP TABLE t2;
@@ -159,12 +159,11 @@ SET GLOBAL rocksdb_strict_collation_exceptions="[a-b";
let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err;
let SEARCH_PATTERN=Invalid pattern in strict_collation_exceptions: \[a-b;
source include/search_pattern_in_file.inc;
---error ER_UNKNOWN_ERROR
CREATE TABLE a (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
SET GLOBAL rocksdb_strict_collation_exceptions="[a-b]";
CREATE TABLE a (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
CREATE TABLE b (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE c (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE a, b;
@@ -173,11 +172,11 @@ call mtr.add_suppression("Invalid pattern in strict_collation_exceptions:");
SET GLOBAL rocksdb_strict_collation_exceptions="abc\\";
let SEARCH_PATTERN=Invalid pattern in strict_collation_exceptions: abc;
source include/search_pattern_in_file.inc;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE abc (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
SET GLOBAL rocksdb_strict_collation_exceptions="abc";
CREATE TABLE abc (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
---error ER_UNKNOWN_ERROR
+--error ER_UNSUPPORTED_COLLATION
CREATE TABLE abcd (id INT PRIMARY KEY, value varchar(50), index(value)) engine=rocksdb charset utf8;
DROP TABLE abc;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test b/storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test
index 7f741e286b1..334b1bb2750 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test
@@ -1,3 +1,5 @@
+--source include/have_rocksdb.inc
+
CREATE TABLE `r1.lol` (
`c1` int(10) NOT NULL DEFAULT '0',
`c2` int(11) NOT NULL DEFAULT '0',
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc b/storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc
index be01338cb85..ca7510b1253 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc
@@ -23,7 +23,7 @@ eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation;
# no external inserts should be visible to the transaction.
# But it should only work this way for REPEATABLE-READ and SERIALIZABLE
---error 0,ER_UNKNOWN_ERROR
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--echo ERROR: $mysql_errno
@@ -38,7 +38,7 @@ connection con2;
select * from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS';
connection con1;
---error 0,ER_UNKNOWN_ERROR
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--echo ERROR: $mysql_errno
@@ -79,7 +79,7 @@ SELECT * FROM r1; # 5
COMMIT;
SELECT * FROM r1; # 6
---error 0,ER_UNKNOWN_ERROR
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--echo ERROR: $mysql_errno
@@ -97,7 +97,7 @@ SELECT * FROM r1; # 6
COMMIT;
SELECT * FROM r1; # 8
---error 0,ER_UNKNOWN_ERROR
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--echo ERROR: $mysql_errno
@@ -105,7 +105,7 @@ connection con2;
INSERT INTO r1 values (9,9,9);
connection con1;
---error 0,ER_UNKNOWN_ERROR
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--echo ERROR: $mysql_errno
@@ -115,11 +115,11 @@ INSERT INTO r1 values (10,10,10);
connection con1;
SELECT * FROM r1; # 9
---error 0,ER_UNKNOWN_ERROR
+--error 0,ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--echo ERROR: $mysql_errno
# Succeeds with Read Committed, Fails with Repeatable Read
---error 0,ER_UNKNOWN_ERROR
+--error 0,ER_UPDATES_WITH_CONSISTENT_SNAPSHOT
INSERT INTO r1 values (11,11,11);
--echo ERROR: $mysql_errno
SELECT * FROM r1; # self changes should be visible
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test b/storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test
index e1f38dd7067..eca0830c4b4 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test
@@ -24,7 +24,7 @@ select * from t1;
set @tmp1=@@rocksdb_verify_row_debug_checksums;
set rocksdb_verify_row_debug_checksums=1;
set session debug_dbug= "+d,myrocks_simulate_bad_row_read1";
---error ER_GET_ERRNO
+--error ER_GET_ERRMSG
select * from t1 where pk=1;
set session debug_dbug= "-d,myrocks_simulate_bad_row_read1";
set rocksdb_verify_row_debug_checksums=@tmp1;
@@ -32,12 +32,12 @@ set rocksdb_verify_row_debug_checksums=@tmp1;
select * from t1 where pk=1;
set session debug_dbug= "+d,myrocks_simulate_bad_row_read2";
---error ER_GET_ERRNO
+--error ER_GET_ERRMSG
select * from t1 where pk=1;
set session debug_dbug= "-d,myrocks_simulate_bad_row_read2";
set session debug_dbug= "+d,myrocks_simulate_bad_row_read3";
---error ER_GET_ERRNO
+--error ER_GET_ERRMSG
select * from t1 where pk=1;
set session debug_dbug= "-d,myrocks_simulate_bad_row_read3";
@@ -58,7 +58,7 @@ create table t2 (
insert into t2 values ('ABCD',1);
select * from t2;
set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1";
---error ER_GET_ERRNO
+--error ER_GET_ERRMSG
select * from t2;
set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1";
@@ -73,7 +73,7 @@ insert into t2 values ('ABCD',1);
select * from t2;
set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1";
---error ER_GET_ERRNO
+--error ER_GET_ERRMSG
select * from t2;
set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1";
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
new file mode 100644
index 00000000000..344b10bb042
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test
@@ -0,0 +1,79 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/have_nodebug.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+#
+# Normal case
+#
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+#
+# Create same table with older format to test compatibility
+#
+
+set session debug= '+d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_NO_COVERED_BITMAP_FORMAT';
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--let $prefix_index_check_title= No longer eligible for optimization since no covered bitmap was stored.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
new file mode 100644
index 00000000000..01bb5cfa4ed
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test
@@ -0,0 +1,18 @@
+# Test high priority DDL
+# There are two modes of high priority DDL:
+# 1. Through the system variable high_priority_ddl
+# 2. Through syntax: CREATE/ALTER/DROP/OPTIMIZE HIGH_PRIORITY ...
+
+--source include/have_rocksdb.inc
+
+--echo ##
+--echo ## Using the system variable high_priority_ddl"
+--echo ##
+--let $use_sys_var = 1;
+--source include/ddl_high_priority.inc
+
+--echo ##
+--echo ## Using HIGH_PRIORITY syntax
+--echo ##
+--let $use_sys_var = 0;
+--source include/ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test
new file mode 100644
index 00000000000..a9b30a4273a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test
@@ -0,0 +1,3 @@
+let $engine=rocksdb;
+
+--source include/deadlock_stats.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
new file mode 100644
index 00000000000..d2abcb3b63b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test
@@ -0,0 +1,153 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+eval create table t (i int primary key) engine=$engine;
+insert into t values (1), (2), (3);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/
+show engine rocksdb transaction status;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTIONID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/
+show engine rocksdb transaction status;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/delete.test b/storage/rocksdb/mysql-test/rocksdb/t/delete.test
index b1654e606a5..c829dd9da3b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/delete.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/delete.test
@@ -90,9 +90,9 @@ DELETE FROM t1 WHERE a <= 4 ORDER BY b DESC LIMIT 1;
SAVEPOINT spt1;
DELETE FROM t1;
INSERT INTO t1 (a,b) VALUES (1,'a');
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_TO_SAVEPOINT
ROLLBACK TO SAVEPOINT spt1;
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
COMMIT;
--sorted_result
SELECT a,b FROM t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
index f53a6050e89..f40e9db55b2 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt
@@ -1,3 +1,3 @@
---rocksdb_max_background_compactions=8
--rocksdb_max_subcompactions=1
--rocksdb_default_cf_options=write_buffer_size=512k;target_file_size_base=512k;level0_file_num_compaction_trigger=2;level0_slowdown_writes_trigger=-1;level0_stop_writes_trigger=1000;max_bytes_for_level_base=1m
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
index 0d48ae461ca..b62d3fbcb2d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table.test
@@ -10,6 +10,10 @@ DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+call mtr.add_suppression("LibRocksDB");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
@@ -107,9 +111,8 @@ let $wait_condition = select count(*) = 0
where TYPE = 'DDL_DROP_INDEX_ONGOING';
--source include/wait_condition.inc
-# Get list of all indices needing to be dropped
-# Check total compacted-away rows for all indices
-# Check that all indices have been successfully dropped
+## Upstream has removed the following:
+--disable_parsing
perl;
sub print_array {
@@ -149,6 +152,7 @@ foreach (sort {$a <=> $b} keys %a){
print_array("Begin filtering dropped index+", sort {$a <=> $b} @b);
print_array("Finished filtering dropped index+", sort {$a <=> $b} @c);
EOF
+--enable_parsing
# Cleanup
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
index 1b5f6c14ee1..14d856cc0c5 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test
@@ -11,6 +11,9 @@ DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
@@ -89,6 +92,10 @@ let $max = 1000;
let $table = t5;
--source drop_table_repopulate_table.inc
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
+
perl;
$size+=-s $_ for (<$ENV{MYSQLTEST_VARDIR}/mysqld.1/data/.rocksdb/*.sst>);
$filename= "$ENV{MYSQLTEST_VARDIR}/tmp/size_output";
@@ -101,6 +108,10 @@ drop table t3;
drop table t4;
drop table t5;
+set @@global.rocksdb_compact_cf = 'cf1';
+set @@global.rocksdb_compact_cf = 'rev:cf2';
+set @@global.rocksdb_compact_cf = 'default';
+
let $show_rpl_debug_info= 1; # to force post-failure printout
let $wait_timeout= 300; # Override default 30 seconds with 300.
let $wait_condition = select count(*) = 0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
index 7a643d9a720..1a044384a45 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc
@@ -7,6 +7,9 @@ call mtr.add_suppression("Column family 'rev:cf2' not found");
DROP TABLE IF EXISTS t1;
--enable_warnings
+call mtr.add_suppression("Column family 'cf1' not found");
+call mtr.add_suppression("Column family 'rev:cf2' not found");
+
# Start from clean slate
set global rocksdb_compact_cf = 'cf1';
set global rocksdb_compact_cf = 'rev:cf2';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
index a9a09d44e01..b4a0c9e5e96 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test
@@ -22,17 +22,21 @@ CREATE TABLE t2 (id1 INT, id2 INT, id3 INT,
DROP TABLE t1;
DROP TABLE t2;
+set global rocksdb_large_prefix=1;
CREATE TABLE t1 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1)) ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
+set global rocksdb_large_prefix=1;
CREATE TABLE t2 (id1 varchar(128) CHARACTER SET latin1 COLLATE latin1_bin,
id2 varchar(256) CHARACTER SET utf8 COLLATE utf8_bin,
id3 varchar(200) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
PRIMARY KEY (id1, id2, id3),
UNIQUE KEY (id3, id1) COMMENT 'rev:cf') ENGINE=ROCKSDB;
+set global rocksdb_large_prefix=DEFAULT;
--source include/dup_key_update.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
new file mode 100644
index 00000000000..15f6c36f52d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test
@@ -0,0 +1,120 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/have_nodebug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ id INT,
+ fake_id INT,
+ bigfield VARCHAR(4096),
+ PRIMARY KEY (id),
+ KEY bf (bigfield(32)),
+ KEY fid (fake_id, bigfield(32))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES (1, 1001, REPEAT('a', 1)),
+ (8, 1008, REPEAT('b', 8)),
+ (24, 1024, REPEAT('c', 24)),
+ (31, 1031, REPEAT('d', 31)),
+ (32, 1032, REPEAT('x', 32)),
+ (33, 1033, REPEAT('y', 33)),
+ (128, 1128, REPEAT('z', 128));
+
+SELECT * FROM t1;
+
+--disable_query_log
+
+--let $prefix_index_check_title= Baseline sanity check
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT "no-op query"
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('d', 31)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization, access via fake_id only
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1031
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization, access via fake_id of big row.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(fid) WHERE fake_id = 1033
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+# The secondary_index_read call isn't covered because the next record in the
+# index has a bigfield value of length 33, so only one of two lookups are
+# covered here.
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 1
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('x', 32)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('y', 33)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('b', 8)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 2
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('c', 24)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Not eligible for optimization.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT id, bigfield FROM t1 FORCE INDEX(bf) WHERE bigfield = REPEAT('z', 128)
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
+
+--echo #
+--echo # Test that multi-byte charsets are handled correctly
+--echo #
+
+SET NAMES utf8;
+
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b VARCHAR(30) CHARACTER SET utf8 collate utf8_bin,
+ KEY k (b(2))
+) ENGINE=rocksdb;
+
+INSERT INTO t1 VALUES
+ (1, 'a'),
+ (2, 'cc'),
+ (3, 'ŽŽ'),
+ (4, 'žžžž');
+
+--let $prefix_index_check_title= Charset record obviously shorter than the prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "a"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "cc"
+--let $prefix_index_check_read_avoided_delta= 2
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record with glyphs shorter than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "ŽŽ"
+--let $prefix_index_check_read_avoided_delta= 1
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+--let $prefix_index_check_title= Charset record longer than prefix
+--let $prefix_index_check_query= SELECT * FROM t1 FORCE INDEX(k) WHERE b = "žžžž"
+--let $prefix_index_check_read_avoided_delta= 0
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test b/storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test
index bd8071b1b5e..675a337c24d 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test
@@ -1,3 +1,5 @@
+--source include/have_rocksdb.inc
+
--disable_warnings
DROP TABLE IF EXISTS t1, t2;
--enable_warnings
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index.inc b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
index 6b4e4ff233b..8b000a255b3 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index.inc
@@ -119,3 +119,37 @@ DROP TABLE t1;
--enable_parsing
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(1024),
+ KEY (a(768))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+# Should display warning
+CREATE TABLE t1 (
+ a BLOB(4096),
+ KEY (a(3073))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt
new file mode 100644
index 00000000000..c07b063f07c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=off --binlog_format=row --log-bin
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
new file mode 100644
index 00000000000..abf8d71911b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test
@@ -0,0 +1,109 @@
+--source include/have_rocksdb.inc
+
+#
+# index_merge_rocksdb test copied over from index_merge_ror.inc
+#
+# Triggers issue # https://github.com/facebook/mysql-5.6/issues/604
+
+CREATE TABLE t1
+(
+ /* fields/keys for row retrieval tests */
+ key1 INT,
+ key2 INT,
+ key3 INT,
+ key4 INT,
+
+ /* make rows much bigger then keys */
+ filler1 CHAR(200),
+
+ KEY(key1),
+ KEY(key2)
+) ENGINE=ROCKSDB;
+
+# fill table
+CREATE TABLE t0 AS SELECT * FROM t1;
+--disable_query_log
+--echo # Printing of many insert into t0 values (....) disabled.
+let $cnt=100;
+while ($cnt)
+{
+ eval INSERT INTO t0 VALUES (0, 0, 0, 0, 'data1');
+ dec $cnt;
+}
+
+--echo # Printing of many insert into t1 select .... from t0 disabled.
+let $1=4;
+while ($1)
+{
+ let $2=4;
+ while ($2)
+ {
+ let $3=4;
+ while ($3)
+ {
+ eval INSERT INTO t1 SELECT key1, key2, key3, key4, filler1 FROM t0;
+ dec $3;
+ }
+ dec $2;
+ }
+ dec $1;
+}
+
+--echo # Printing of many insert into t1 (...) values (....) disabled.
+# Row retrieval tests
+# -1 is used for values 'out of any range we are using'
+# insert enough rows for index intersection to be used for (key1,key2)
+INSERT INTO t1 (key1, key2, key3, key4, filler1) VALUES (100, 100, 100, 100,'key1-key2-key3-key4');
+let $cnt=400;
+while ($cnt)
+{
+ eval INSERT INTO t1 (key1, key2, key3, key4, filler1) VALUES (100, -1, 100, -1,'key1-key3');
+ dec $cnt;
+}
+let $cnt=400;
+while ($cnt)
+{
+ eval INSERT INTO t1 (key1, key2, key3, key4, filler1) VALUES (-1, 100, -1, 100,'key2-key4');
+ dec $cnt;
+}
+--enable_query_log
+
+SELECT COUNT(*) FROM t1;
+
+-- disable_query_log
+-- disable_result_log
+ANALYZE TABLE t1;
+-- enable_result_log
+-- enable_query_log
+
+SET GLOBAL rocksdb_force_flush_memtable_now = 1;
+
+--replace_column 9 #
+EXPLAIN UPDATE t1 SET filler1='to be deleted' WHERE key1=100 AND key2=100;
+UPDATE t1 SET filler1='to be deleted' WHERE key1=100 and key2=100;
+
+DROP TABLE t0, t1;
+
+# Issue624 - MyRocks executes index_merge query plan incorrectly
+create table t1 (key1 int, key2 int, key3 int, key (key1), key (key2), key(key3)) engine=rocksdb;
+insert into t1 values (1, 100, 100), (1, 200, 200), (1, 300, 300);
+--disable_query_log
+let $i = 1;
+while ($i <= 1000) {
+ let $insert = INSERT INTO t1 VALUES(1000,1000,1000);
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+analyze table t1;
+set global rocksdb_force_flush_memtable_now=1;
+
+--replace_column 9 #
+explain select * from t1 where key1 = 1;
+--replace_column 9 #
+explain select key1,key2 from t1 where key1 = 1 or key2 = 1;
+select * from t1 where key1 = 1;
+select key1,key2 from t1 where key1 = 1 or key2 = 1;
+
+drop table t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt
new file mode 100644
index 00000000000..c07b063f07c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt
@@ -0,0 +1 @@
+--rocksdb_strict_collation_check=off --binlog_format=row --log-bin
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
new file mode 100644
index 00000000000..a4d26cf7739
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test
@@ -0,0 +1,70 @@
+# Skiping this test from Valgrind execution as per Bug-14627884
+--source include/not_valgrind.inc
+# Adding big test option for this test.
+--source include/big_test.inc
+
+# t/index_merge_innodb.test
+#
+# Index merge tests
+#
+# Last update:
+# 2006-08-07 ML test refactored (MySQL 5.1)
+# Main code of several index_merge tests
+# -> include/index_merge*.inc
+# wrapper t/index_merge_innodb.test sources now several
+# include/index_merge*.inc files
+#
+
+--source include/have_rocksdb.inc
+let $engine_type= RocksDB;
+# skipping because too unstable in MyRocks
+let $skip_ror_EXPLAIN_for_MyRocks = 1;
+let $random_rows_in_EXPLAIN = 1;
+let $sorted_result = 1;
+# According to Oracle: "InnoDB's estimate for the index cardinality
+# depends on a pseudo random number generator (it picks up random
+# pages to sample). After an optimization that was made in r2625 two
+# EXPLAINs started returning a different number of rows (3 instead of
+# 4)", so:
+let $index_merge_random_rows_in_EXPLAIN = 1;
+# RocksDB does not support Merge tables (affects include/index_merge1.inc)
+let $merge_table_support= 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge1.inc
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge_ror.inc
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge2.inc
+
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge_2sweeps.inc
+set global rocksdb_force_flush_memtable_now=1;
+--source include/index_merge_ror_cpk.inc
+
+set global rocksdb_force_flush_memtable_now=1;
+--echo #
+--echo # Bug#11747423 32254: INDEX MERGE USED UNNECESSARILY
+--echo #
+CREATE TABLE t1 (
+ id INT NOT NULL PRIMARY KEY,
+ id2 INT NOT NULL,
+ id3 INT NOT NULL,
+ KEY (id2),
+ KEY (id3),
+ KEY covering_index (id2,id3)
+) ENGINE=RocksDB;
+
+INSERT INTO t1 VALUES (0, 0, 0), (1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4), (5, 5, 5), (6, 6, 6), (7, 7, 7);
+INSERT INTO t1 SELECT id + 8, id2 + 8, id3 +8 FROM t1;
+INSERT INTO t1 SELECT id + 16, 7, 0 FROM t1;
+
+-- disable_query_log
+-- disable_result_log
+analyze table t1;
+-- enable_result_log
+-- enable_query_log
+
+EXPLAIN SELECT SQL_NO_CACHE count(*) FROM t1 WHERE id2=7 AND id3=0;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
index 3abd2dd05fe..5dcfbaa8d3b 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/index_primary.test
@@ -62,3 +62,35 @@ ALTER TABLE t1 ADD CONSTRAINT PRIMARY KEY pk (a);
SHOW KEYS IN t1;
DROP TABLE t1;
+#
+# Test index prefix length limits.
+#
+set global rocksdb_large_prefix=0;
+
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(767))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(1024),
+ PRIMARY KEY (a(768))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=1;
+
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3072))
+) ENGINE=rocksdb;
+DROP TABLE t1;
+
+--error ER_TOO_LONG_KEY
+CREATE TABLE t1 (
+ a BLOB(4096),
+ PRIMARY KEY (a(3073))
+) ENGINE=rocksdb;
+
+set global rocksdb_large_prefix=DEFAULT;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
index b1adc16f6a2..c8d9b538529 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/information_schema.test
@@ -46,12 +46,10 @@ CREATE TABLE t2 (
b int,
c int,
d int,
- e int,
PRIMARY KEY (a) COMMENT "cf_a",
KEY (b) COMMENT "cf_b",
KEY (c) COMMENT "cf_c",
- KEY (d) COMMENT "$per_index_cf",
- KEY (e) COMMENT "rev:cf_d") ENGINE=ROCKSDB;
+ KEY (d) COMMENT "rev:cf_d") ENGINE=ROCKSDB;
select * from INFORMATION_SCHEMA.ROCKSDB_GLOBAL_INFO where TYPE = 'CF_FLAGS';
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/insert.test b/storage/rocksdb/mysql-test/rocksdb/t/insert.test
index 14cfe1cadb8..aebd3f76e87 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/insert.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/insert.test
@@ -86,10 +86,10 @@ INSERT t1 (a) VALUE (10),(20);
SAVEPOINT spt1;
INSERT INTO t1 SET a = 11, b = 'f';
INSERT t1 SET b = DEFAULT;
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_TO_SAVEPOINT
ROLLBACK TO SAVEPOINT spt1;
INSERT INTO t1 (b,a) VALUES ('test1',10);
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
COMMIT;
--sorted_result
SELECT a,b FROM t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test b/storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test
new file mode 100644
index 00000000000..0997bde3f49
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test
@@ -0,0 +1,80 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (
+ id INT,
+ val1 INT,
+ val2 INT,
+ PRIMARY KEY (id)
+) ENGINE=rocksdb;
+
+#
+# DB operations without Transaction , every count should be 0
+# so there's no count related output
+#
+INSERT INTO t1 VALUES(1,1,1),(2,1,2);
+SELECT * FROM t1;
+UPDATE t1 SET val1=2 WHERE id=2;
+SELECT * FROM t1;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+#
+# DB operations with Tansaction, insert_count, update_count, delete_count
+# and total write_count should be printed
+# Cases: rollback, commit transaction
+#
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t1 VALUES(20,1,1),(30,30,30);
+SELECT * FROM t1;
+UPDATE t1 SET val1=20, val2=20 WHERE id=20;
+SELECT * FROM t1;
+DELETE FROM t1 WHERE id=30;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+ROLLBACK;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+START TRANSACTION;
+INSERT INTO t1 VALUES(40,40,40);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+COMMIT;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+
+SET AUTOCOMMIT=1;
+DROP TABLE t1;
+
+#
+# Secondary Key Tests
+#
+--disable_warnings
+DROP TABLE IF EXISTS t2;
+--enable_warnings
+
+CREATE TABLE t2 (
+ id1 INT,
+ id2 INT,
+ value INT,
+ PRIMARY KEY (id1),
+ KEY (id2)
+) ENGINE=rocksdb;
+
+SET AUTOCOMMIT=0;
+START TRANSACTION;
+INSERT INTO t2 VALUES(1,2,0),(10,20,30);
+UPDATE t2 SET value=3 WHERE id2=2;
+DELETE FROM t2 WHERE id1=10;
+
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /(ACTIVE) [0-9]+ /\1 NUM / /(thread id) [0-9]+/\1 TID/ /0x[0-9a-f]+/PTR/ /(query id) [0-9]+/\1 QID/ /(root) [a-z ]+/\1 ACTION/
+SHOW ENGINE rocksdb TRANSACTION STATUS;
+ROLLBACK;
+SET AUTOCOMMIT=1;
+DROP TABLE t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue255.test b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
new file mode 100644
index 00000000000..4f1927d366c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue255.test
@@ -0,0 +1,19 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
+
+INSERT INTO t1 VALUES (5);
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+INSERT INTO t1 VALUES ('538647864786478647864');
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+--error ER_DUP_ENTRY
+INSERT INTO t1 VALUES ();
+SELECT * FROM t1;
+--replace_column 3 # 6 # 7 # 8 # 9 # 10 #
+SHOW TABLE STATUS LIKE 't1';
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/issue314.test b/storage/rocksdb/mysql-test/rocksdb/t/issue314.test
index 2059eef2195..822969a380f 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/issue314.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/issue314.test
@@ -8,7 +8,7 @@ SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
CREATE TABLE t1(a int);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
INSERT INTO t1 VALUES(1);
---error ER_UNKNOWN_ERROR
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
select * from t1;
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
select * from t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc
index 5d3678f5f27..1d83598a282 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc
@@ -98,7 +98,7 @@ EOF
--replace_result $datadir <DATADIR>
if ($skip_unique_check == 1)
{
- --error ER_UNKNOWN_ERROR
+ --error ER_ON_DUPLICATE_DISABLED
eval
LOAD DATA INFILE '$datadir/se_replacedata.dat' REPLACE INTO TABLE t1;
}
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
new file mode 100644
index 00000000000..f1777ea3e93
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test
@@ -0,0 +1,34 @@
+create table t (a int primary key) engine=rocksdb;
+
+begin;
+insert into t values (0);
+
+--source include/count_sessions.inc
+--connect (con1,localhost,root,,)
+--connection con1
+set @@rocksdb_lock_wait_timeout=1;
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+begin;
+
+--connect (con2,localhost,root,,)
+--connection con2
+set @@rocksdb_lock_wait_timeout=1;
+begin;
+
+--connection con1
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t values(0);
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+
+--connection con2
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t values(0);
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+select ROW_LOCK_WAIT_TIMEOUTS from information_schema.table_statistics where table_name="t";
+
+--disconnect con1
+--connection default
+--disconnect con2
+drop table t;
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
new file mode 100644
index 00000000000..de6b94de8e4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test
@@ -0,0 +1,49 @@
+# Disable until covered unpack_info format enabled in prod
+--source include/have_debug.inc
+--source include/have_nodebug.inc
+
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS T1;
+--enable_warnings
+
+CREATE TABLE T1 (
+ P1 VARCHAR(64), P2 VARCHAR(64), P3 VARCHAR(64), P4 VARCHAR(64),
+ S1 VARCHAR(64), S2 VARCHAR(64), S3 VARCHAR(64), S4 VARCHAR(64),
+ S5 VARCHAR(64), S6 VARCHAR(64), S7 VARCHAR(64), S8 VARCHAR(64),
+ S9 VARCHAR(64), S10 VARCHAR(64), S11 VARCHAR(64), S12 VARCHAR(64),
+ S13 VARCHAR(64), S14 VARCHAR(64), S15 VARCHAR(64), S16 VARCHAR(64),
+ PRIMARY KEY (P1(8), P2(8), P3(8), P4(8)),
+ KEY SK (S1(8), S2(8), S3(8), S4(8),
+ S5(8), S6(8), S7(8), S8(8),
+ S9(8), S10(8), S11(8), S12(8),
+ S13(8), S14(8), S15(8), S16(8))
+) ENGINE=rocksdb;
+
+INSERT INTO T1 VALUES ('1', '2', '3', '4',
+ '5', '6', '7', '8',
+ '9', '10', '11', '12',
+ '13', '14', '15', '16',
+ '17', '18', '19', '20');
+
+SELECT * FROM T1;
+
+--let $prefix_index_check_title= Not eligible for optimization, shorter than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DELETE FROM T1;
+INSERT INTO T1 VALUES ('100000000', '200000000', '300000000', '400000000',
+ '500000000', '600000000', '700000000', '800000000',
+ '900000000', '100000000', '110000000', '120000000',
+ '130000000', '140000000', '150000000', '160000000',
+ '170000000', '180000000', '190000000', '200000000');
+
+--let $prefix_index_check_title= Not eligible for optimization, longer than prefix length.
+--let $prefix_index_check_read_avoided_delta= 0
+--let $prefix_index_check_query= SELECT P4, S2 FROM T1 FORCE INDEX(SK) WHERE S1='5'
+--source suite/rocksdb/include/prefix_index_only_query_check.inc
+
+DROP TABLE T1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt
new file mode 100644
index 00000000000..6c4cea3d4b7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt
@@ -0,0 +1 @@
+$NP_EXAMPLE_LIB_OPT
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test
new file mode 100644
index 00000000000..d4f38a607cf
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test
@@ -0,0 +1,2 @@
+let $engine=rocksdb;
+--source include/native_procedure.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test b/storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test
index beccc8a6b8e..db66da3b0dc 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test
@@ -1,3 +1,4 @@
+--source include/have_rocksdb.inc
let $engine=rocksdb;
--source include/loose_index_scans.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt
new file mode 100644
index 00000000000..ca7e3636645
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=write_buffer_size=64k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;};prefix_extractor=capped:24;disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test
new file mode 100644
index 00000000000..13d76bb5a3f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test
@@ -0,0 +1,96 @@
+--source include/have_rocksdb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (id1 BIGINT, id2 BIGINT, id3 BIGINT, id4 BIGINT, PRIMARY KEY (id1, id2, id3, id4) comment 'cf1') ENGINE=rocksdb collate latin1_bin;
+--disable_query_log
+let $i = 1;
+while ($i <= 100) {
+ let $insert = INSERT INTO t1 VALUES(1, $i, $i, $i);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+set global rocksdb_force_flush_memtable_now = 1;
+
+--echo
+--echo Original Prefix Extractor:
+--echo
+--sorted_result
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
+# BF used (4+8+8+8)
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+--exec echo "" > $MYSQLTEST_VARDIR/log/mysqld.1.err
+--let $_mysqld_option=--rocksdb_override_cf_options=cf1={prefix_extractor=capped:26};
+
+--echo
+--echo Prefix Extractor (after override_cf_options set, should not be changed):
+--echo
+--sorted_result
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
+# This should no longer crash. See https://github.com/facebook/mysql-5.6/issues/641
+--echo
+--echo Restarting with new Prefix Extractor...
+--echo
+--source include/restart_mysqld_with_option.inc
+
+--echo
+--echo Changed Prefix Extractor (after restart):
+--echo
+--sorted_result
+SELECT * FROM information_schema.rocksdb_cf_options WHERE option_type like '%prefix_extractor%';
+
+# Satisfies can_use_bloom_filter (4+8+8+8), but can't use because the old SST
+# files have old prefix extractor
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+# Insert more data into t1, verify it uses new bloom filter
+--disable_query_log
+let $i = 101;
+while ($i <= 200) {
+ let $insert = INSERT INTO t1 VALUES(1, $i, $i, $i);
+ eval $insert;
+ inc $i;
+}
+--enable_query_log
+
+set global rocksdb_force_flush_memtable_now = 1;
+
+# BF used w/ new prefix extractor (4+8+8+8) (still increments once because it
+# needs to check the new SST file, but doesnt increment for SST file with old
+# extractor)
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+# should have 2 sst files, one with old prefix extractor and one with new
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+
+# update some old data, force compaction, verify that new SST files use
+# new bloom filter
+UPDATE t1 SET id1=1,id2 = 30,id3 = 30 WHERE id4 >= 0 AND id4 <=10;
+set global rocksdb_force_flush_memtable_now = 1;
+
+# should have 3 sst files, one with old prefix extractor and two with new
+SELECT COUNT(*) FROM information_schema.rocksdb_index_file_map WHERE COLUMN_FAMILY != 1;
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=1 AND id3=1;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+set global rocksdb_compact_cf='cf1';
+
+# Select the updated, make sure bloom filter is checked now
+select variable_value into @u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+SELECT COUNT(*) FROM t1 WHERE id1=1 AND id2=30 AND id3=30;
+select variable_value-@u from information_schema.global_status where variable_name='rocksdb_bloom_filter_prefix_checked';
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test b/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test
index 56070652618..52f65095d33 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test
@@ -37,7 +37,7 @@ connection con1;
select case when variable_value-@p < 1000 then 'true' else variable_value-@p end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_put';
select case when variable_value-@s < 100 then 'true' else variable_value-@s end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
SELECT * FROM t1;
---error ER_UNKNOWN_ERROR
+--error ER_UPDATES_WITH_CONSISTENT_SNAPSHOT
INSERT INTO t1 values (2, 2);
ROLLBACK;
SELECT * FROM t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
index 87fc2e6f0fb..0089b62e0b9 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test
@@ -1141,7 +1141,7 @@ set @a=-1;
insert into t1 select (@a:=@a+1), 1234 from information_schema.session_variables limit 100;
set @tmp1= @@rocksdb_max_row_locks;
set rocksdb_max_row_locks= 20;
---error ER_GET_ERRMSG
+--error ER_RDB_STATUS_GENERAL
update t1 set a=a+10;
DROP TABLE t1;
@@ -1356,7 +1356,9 @@ DROP TABLE t1;
--echo #
--echo # Issue #17: Automatic per-index column families
+--echo # (Now deprecated)
--echo #
+--error ER_PER_INDEX_CF_DEPRECATED
create table t1 (
id int not null,
key1 int,
@@ -1364,44 +1366,6 @@ create table t1 (
index (key1) comment '$per_index_cf'
) engine=rocksdb;
---echo #Same CF ids with different CF flags
---error ER_UNKNOWN_ERROR
-create table t1_err (
- id int not null,
- key1 int,
- PRIMARY KEY (id),
- index (key1) comment 'test.t1.key1'
-) engine=rocksdb;
-
-create table t1_err (
- id int not null,
- key1 int,
- PRIMARY KEY (id),
- index (key1) comment 'test.t1.key2'
-) engine=rocksdb;
-drop table t1_err;
-
---echo # Unfortunately there is no way to check which column family everything goes to
-insert into t1 values (1,1);
-select * from t1;
---echo # Check that ALTER and RENAME are disallowed
---error ER_NOT_SUPPORTED_YET
-alter table t1 add col2 int;
-
---error ER_NOT_SUPPORTED_YET
-rename table t1 to t2;
-
-drop table t1;
-
---echo # Check detection of typos in \$per_index_cf
---error ER_NOT_SUPPORTED_YET
-create table t1 (
- id int not null,
- key1 int,
- PRIMARY KEY (id),
- index (key1) comment '$per_idnex_cf'
-)engine=rocksdb;
-
--echo #
--echo # Issue #22: SELECT ... FOR UPDATE takes a long time
@@ -1699,7 +1663,9 @@ SET @old_mode = @@sql_mode;
SET sql_mode = 'strict_all_tables';
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(c, b(255))) engine=rocksdb;
drop table t1;
+set global rocksdb_large_prefix=1;
create table t1 (a int, b text, c varchar(400), Primary Key(a), Key(b(1255))) engine=rocksdb;
+set global rocksdb_large_prefix=0;
insert into t1 values (1, '1abcde', '1abcde'), (2, '2abcde', '2abcde'), (3, '3abcde', '3abcde');
select * from t1;
--replace_column 9 #
@@ -1795,9 +1761,9 @@ INSERT INTO t1 VALUES(1, 1);
INSERT INTO t1 VALUES(1, 2);
INSERT INTO t1 VALUES(1, 3);
SELECT * FROM t1;
---error ER_UNKNOWN_ERROR
+--error ER_ON_DUPLICATE_DISABLED
REPLACE INTO t1 VALUES(4, 4);
---error ER_UNKNOWN_ERROR
+--error ER_ON_DUPLICATE_DISABLED
INSERT INTO t1 VALUES(5, 5) ON DUPLICATE KEY UPDATE value=value+1;
TRUNCATE TABLE t1;
SET @save_rocksdb_bulk_load_size= @@rocksdb_bulk_load_size;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test
index 93febbc1319..3ba54dd9c84 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test
@@ -420,6 +420,7 @@ ALTER TABLE t2 ADD KEY (`col3`, `col4`) COMMENT 'custom_p5_cfname=another_cf_for
SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='another_cf_for_p5';
# Verify that correct partition and key are used when searching.
+ANALYZE TABLE t2;
EXPLAIN PARTITIONS SELECT * FROM t2 WHERE col3 = 0x4 AND col2 = 0x34567;
DROP TABLE t2;
@@ -494,3 +495,18 @@ SELECT DISTINCT(cf_name) FROM information_schema.rocksdb_cfstats WHERE cf_name='
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
+
+
+# Test that truncating table actually removes rows.
+CREATE TABLE t1 (
+ a INT NOT NULL,
+ PRIMARY KEY (a) COMMENT 'p1_cfname=foo;'
+) ENGINE=ROCKSDB
+PARTITION BY LIST COLUMNS(a)
+(PARTITION p1 VALUES IN (1) ENGINE = ROCKSDB);
+
+INSERT INTO t1 values (1);
+TRUNCATE TABLE t1;
+SELECT * FROM t1;
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test b/storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test
index 8543ce81de4..c46d8b07637 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test
@@ -1,3 +1,5 @@
+--source include/have_rocksdb.inc
+
--disable_warnings
DROP TABLE IF EXISTS t1, t2;
--enable_warnings
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test
index 13325cf2aa1..56d14f92892 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test
@@ -17,9 +17,9 @@ begin;
insert into t1 values (11, 1);
savepoint a;
insert into t1 values (12, 1);
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_TO_SAVEPOINT
rollback to savepoint a;
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
commit;
commit;
select * from t1;
@@ -34,11 +34,11 @@ begin;
insert into t1 values (21, 1);
savepoint a;
insert into t1 values (22, 1);
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_TO_SAVEPOINT
rollback to savepoint a;
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
insert into t1 values (23, 1);
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
commit;
commit;
select * from t1;
@@ -55,9 +55,9 @@ savepoint a;
insert into t1 values (32, 1);
savepoint b;
insert into t1 values (33, 1);
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_TO_SAVEPOINT
rollback to savepoint a;
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
insert into t1 values (34, 1);
rollback;
select * from t1;
@@ -74,9 +74,9 @@ SAVEPOINT A;
select * from t1;
SAVEPOINT A;
insert into t1 values (35, 35);
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_TO_SAVEPOINT
ROLLBACK TO SAVEPOINT A;
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
START TRANSACTION;
select * from t1;
--source include/sync_slave_sql_with_master.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test
index 39a21e67f05..29671308e9c 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test
@@ -12,7 +12,7 @@ connection master;
select @@binlog_format;
create table t1 (pk int primary key) engine=rocksdb;
---error ER_UNKNOWN_ERROR
+--error ER_REQUIRE_ROW_BINLOG_FORMAT
insert into t1 values (1),(2),(3);
set session rocksdb_unsafe_for_binlog=on;
@@ -21,7 +21,7 @@ select * from t1;
delete from t1;
set session rocksdb_unsafe_for_binlog=off;
---error ER_UNKNOWN_ERROR
+--error ER_REQUIRE_ROW_BINLOG_FORMAT
insert into t1 values (1),(2),(3);
set binlog_format=row;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
index d5914745219..16d978c71b7 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test
@@ -2,6 +2,7 @@
call mtr.add_suppression("Did not write failed ");
call mtr.add_suppression("Can't open and lock privilege tables");
+call mtr.add_suppression("Attempt to delete the trigger file");
SET @ORIG_EVENT_SCHEDULER = @@EVENT_SCHEDULER;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test b/storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test
index c8548d96888..c6ebbfa3f01 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test
@@ -1,3 +1,5 @@
+--source include/have_rocksdb.inc
+
##############################################################################
## SKIP LOCKED | NOWAIT are *not* supported for SELECT...FOR UPDATE in RocksDB
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc b/storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc
index 283afd3d5f8..558e2413256 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc
@@ -23,7 +23,7 @@ if (!$succeeds)
{
--disable_result_log
--disable_query_log
- --error ER_UNKNOWN_ERROR
+ --error ER_RDB_STATUS_GENERAL
eval SET GLOBAL ROCKSDB_CREATE_CHECKPOINT = '$checkpoint';
--enable_query_log
--enable_result_log
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
index 914bc4b1002..06f64ddb3fa 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/show_engine.test
@@ -26,8 +26,8 @@ CREATE TABLE t3 (k INT, PRIMARY KEY (k) COMMENT 'cf_t1') ENGINE = ROCKSDB;
CREATE TABLE t4 (l INT, PRIMARY KEY (l) COMMENT 'cf_t4') ENGINE = ROCKSDB
PARTITION BY KEY(l) PARTITIONS 4;
---replace_column 3 #
-SHOW ENGINE rocksdb STATUS;
+SET @save.rocksdb_max_background_jobs= @@global.rocksdb_max_background_jobs;
+SET GLOBAL rocksdb_max_background_jobs= 1;
INSERT INTO t1 VALUES (1), (2), (3);
SELECT COUNT(*) FROM t1;
@@ -38,6 +38,11 @@ SELECT COUNT(*) FROM t2;
INSERT INTO t4 VALUES (1), (2), (3), (4), (5);
SELECT COUNT(*) FROM t4;
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SET GLOBAL rocksdb_compact_cf="cf_t1";
+--replace_column 2 # 3 #
+SHOW ENGINE rocksdb STATUS;
+
# Fetch data from information schema as well
--replace_column 3 #
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CFSTATS;
@@ -52,7 +57,7 @@ GROUP BY TABLE_NAME, PARTITION_NAME;
--replace_column 3 #
SELECT * FROM INFORMATION_SCHEMA.ROCKSDB_CF_OPTIONS;
-
+
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
@@ -81,3 +86,5 @@ SHOW ENGINE rocksdb TRANSACTION STATUS;
ROLLBACK;
+# Restore old values
+SET GLOBAL rocksdb_max_background_jobs= @save.rocksdb_max_background_jobs;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt
index 72b3af6bcf7..a3d2d07ec79 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt
+++ b/storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt
@@ -1 +1 @@
---rocksdb_default_cf_options=write_buffer_size=16k
+--rocksdb_default_cf_options=write_buffer_size=16k --rocksdb_strict_collation_check=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/singledelete.test b/storage/rocksdb/mysql-test/rocksdb/t/singledelete.test
index 718f6b7202e..5a9d17e0255 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/singledelete.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/singledelete.test
@@ -86,4 +86,20 @@ optimize table t5;
select case when variable_value-@s > 5 and variable_value-@s < 100 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_singledelete';
select case when variable_value-@d < 10 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_number_sst_entry_delete';
-DROP TABLE t1, t2, t3, t4, t5;
+# SingleDelete used for PK. Verify old PK is always deleted.
+CREATE TABLE t6 (
+ pk VARCHAR(64) COLLATE latin1_swedish_ci PRIMARY KEY
+) ENGINE=RocksDB;
+INSERT INTO t6 VALUES ('a');
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+SELECT * FROM t6;
+UPDATE t6 SET pk='A' WHERE pk='a';
+SELECT * FROM t6;
+DELETE FROM t6 where pk='A';
+--echo SELECT should return nothing;
+SELECT * FROM t6;
+SET GLOBAL rocksdb_force_flush_memtable_now=1;
+--echo SELECT should return nothing;
+SELECT * FROM t6;
+
+DROP TABLE t1, t2, t3, t4, t5, t6;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
new file mode 100644
index 00000000000..caffaa25ad9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test
@@ -0,0 +1,39 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+# Write file to make mysql-test-run.pl expect the "crash", but don't restart the
+# server until it is told to
+--let $_server_id= `SELECT @@server_id`
+--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect
+
+CREATE TABLE t1 (pk int primary key) ENGINE=ROCKSDB;
+
+# Create a .frm file without a matching table
+--exec cp $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1.frm $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test/t1#sql-test.frm
+
+# Restart the server with a .frm file exist but that table is not registered in RocksDB
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+
+# This will append '#sql-test' to the end of new name
+set session debug="+d,gen_sql_table_name";
+rename table t1 to t2;
+set session debug= "-d,gen_sql_table_name";
+
+# Remove the corresponding .frm files
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t1*.frm
+--remove_files_wildcard $MYSQLTEST_VARDIR/mysqld.$_server_id/data/test *t2*.frm
+
+# Restart the server with a table registered in RocksDB but does not have a .frm file
+--exec echo "wait" >$_expect_file_name
+shutdown_server 10;
+--exec echo "restart" >$_expect_file_name
+--sleep 5
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test
index dc444f7d308..af3cce19630 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test
@@ -1,3 +1,4 @@
+--source include/have_rocksdb.inc
--source include/master-slave.inc
--source include/have_binlog_format_row.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
new file mode 100644
index 00000000000..38bfb2eef8f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test
@@ -0,0 +1,545 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Basic TTL test
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# column before TTL in value
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# nullable column(s) before TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int,
+ c int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# variable len columns + null column(s) before TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL implicitly generated (no ttl column)
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field as the PK
+CREATE TABLE t1 (
+ a int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should all still be there..
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+
+# TTL field inside multi-part pk
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a, ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field inside key with variable length things..
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL test where you compact (values still exist), real_sleep, then compact again,
+# values should now be gone.
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# Nothing should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1;
+
+# 1 and 2 should get removed here.
+set global rocksdb_compact_cf='default';
+--sorted_result
+SELECT a FROM t1;
+
+# 3 and 4 should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1;
+
+DROP TABLE t1;
+
+# TTL field with nullable ttl column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20),
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL field with non 8-bit integer column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts int,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL duration as some random garbage value
+--error ER_RDB_TTL_DURATION_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+
+# TTL col is some column outside of the table
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+
+# TTL col must have accompanying duration
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+
+# Make sure it doesn't filter out things early
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+
+INSERT INTO t1 values (1);
+SELECT COUNT(*) FROM t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Testing altering table comment with updated TTL duration
+# This should trigger a rebuild of the table
+CREATE TABLE t1 (
+ a INT PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1);
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Tables with hidden PK and SK disabled
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+--error ER_RDB_TTL_UNSUPPORTED
+ALTER TABLE t1 DROP PRIMARY KEY;
+
+DROP TABLE t1;
+
+# Test replacing PK, ttl should still work after
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1;
+
+DROP TABLE t1;
+
+# Make sure table comment filled with other text before/after will work
+# (basically, it needs semicolon before and after)
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int,
+ PRIMARY KEY (a,b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+SELECT COUNT(*) FROM t1;
+
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+# nothing removed here
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1;
+
+# all removed here
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+DROP TABLE t1;
+
+# Test to make sure that TTL retains original timestamp during update
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+UPDATE t1 SET a=a+1;
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,3,5 should be dropped
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=UNIX_TIMESTAMP() WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1;
+DROP TABLE t1;
+
+# Test rows expired stat variable and disable ttl variable
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+
+# Table with TTL won't increment rows expired when no records have been
+# compacted
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ PRIMARY KEY (a)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt
new file mode 100644
index 00000000000..aefc2f5da34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
new file mode 100644
index 00000000000..5a694b7b222
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test
@@ -0,0 +1,371 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# The purpose of read filtering for tables with TTL is to ensure that during a
+# transaction a key which has expired already but not removed by compaction
+# yet, is not returned to the user.
+#
+# Without this the user might be hit with problems such as disappearing rows
+# within a transaction, etc, because the compaction filter ignores snapshots
+# when filtering keys.
+
+# Basic read filtering test
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+--sorted_result
+SELECT * FROM t1;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test that some rows are hidden but others aren't...
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b BIGINT UNSIGNED NOT NULL
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# 1 should be hidden even though compaction hasn't run.
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_compact_cf='default';
+
+# none should be hidden yet, compaction runs but records aren't expired
+--sorted_result
+SELECT a FROM t1;
+
+# all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+--sorted_result
+SELECT a FROM t1;
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+
+DROP TABLE t1;
+
+# Test the filtering code explicitly.
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# should return nothing.
+--sorted_result
+SELECT * FROM t1;
+
+# disable filtering
+set global rocksdb_enable_ttl_read_filtering=0;
+
+# should return everything
+--sorted_result
+SELECT * FROM t1;
+
+# disable filtering
+set global rocksdb_enable_ttl_read_filtering=1;
+
+# should return nothing.
+--sorted_result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+# Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+ a int,
+ b int,
+ c int,
+ PRIMARY KEY (a,b,c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# HA_READ_KEY_EXACT, using full key
+SELECT * FROM t1 WHERE a=1 AND b=2 AND c=2;
+
+# HA_READ_KEY_EXACT, not using full key
+SELECT * FROM t1 WHERE a = 1;
+
+# HA_READ_BEFORE_KEY, not using full key
+SELECT max(a) from t1 where a < 3;
+
+#HA_READ_BEFORE_KEY, using full key
+SELECT max(a) from t1 where a < 2 AND b = 1 AND c < 3;
+
+# HA_READ_KEY_OR_NEXT
+SELECT min(a) from t1 where a >= 1;
+
+# HA_READ_AFTER_KEY, /* Find next rec. after key-record */
+SELECT min(a) from t1 where a > 1;
+
+# HA_READ_PREFIX_LAST, /* Last key with the same prefix */
+select * from t1 where a=1 and b in (1) order by c desc;
+
+# HA_READ_PREFIX_LAST_OR_PREV, /* Last or prev key with the same prefix */
+select max(a) from t1 where a <=10;
+
+# need to test read_range_first()
+# calls into read_range_next() and uses compare_keys() to see if its out of
+# range
+select a from t1 where a > 0 and a <= 2;
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+DROP TABLE t1;
+
+# duplicate PK value attempt to be inserted when old one is expired...
+# in this case, we pretend the expired key was not found and insert into PK
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+SELECT * FROM t1;
+
+# this should work, even if old value is not filtered out yet.
+INSERT INTO t1 values (1);
+
+# should show (1) result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+# Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1;
+
+# No error is thrown here, under the hood rnd_next_with_direction is
+# filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+
+##
+## More tests on update behaviour with expired keys.
+##
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+set global rocksdb_debug_ttl_rec_ts = -110;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (3,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (5,5);
+
+# expired key (1) is still around under the hood, but
+# this time rnd_next_with_direction finds non-expired key (5). So the
+# execution flow in the SQL layer moves onto update_write_row, where it then
+# finds the duplicate key (1). But the duplicate key is expired, so it allows
+# the overwrite.
+UPDATE t1 set a = 1;
+
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_enable_ttl_read_filtering=0;
+# 1,1 should be gone, even with read filtering disabled as it has been
+# overwritten
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_enable_ttl_read_filtering=1;
+
+# get_row_by_rowid tested here via index_read_map_impl
+UPDATE t1 set a = 999 where a = 1;
+--sorted_result
+SELECT * FROM t1;
+
+UPDATE t1 set a = a - 1;
+--sorted_result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+# Ensure no rows can disappear in the middle of long-running transactions
+# Also ensure repeatable-read works as expected
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 values (1);
+
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+
+# We need the below snippet in case establishing con1 took an arbitrary
+# amount of time. See https://github.com/facebook/mysql-5.6/pull/617#discussion_r120525391.
+--disable_query_log
+--let $snapshot_size= `SELECT COUNT(*) FROM t1`
+--let $i= 0
+while ($snapshot_size != 1)
+{
+ if ($i == 1000)
+ {
+ --die Your testing host is too slow for reasonable TTL testing
+ }
+
+ $i++;
+ ROLLBACK;
+ INSERT INTO t1 values (1);
+ BEGIN;
+ --let $snapshot_size= `SELECT COUNT(*) FROM t1`
+}
+--enable_query_log
+
+# Nothing filtered out here
+--sorted_result
+SELECT * FROM t1;
+
+--sleep 5
+
+--sorted_result
+SELECT * FROM t1; # <= shouldn't be filtered out here
+
+--echo # Switching to connection 2
+connection con2;
+# compaction doesn't do anythign since con1 snapshot is still open
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+# read filtered out, because on a different connection, on
+# this connection the records have 'expired' already so they are filtered out
+# even though they have not yet been removed by compaction
+--sorted_result
+SELECT * FROM t1;
+
+--echo # Switching to connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1; # <= shouldn't be filtered out here
+
+UPDATE t1 set a = a + 1;
+--sorted_result
+SELECT * FROM t1; # <= shouldn't be filtered out here
+
+COMMIT;
+
+--sorted_result # <= filtered out here because time has passed.
+SELECT * FROM t1;
+
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+
+#transaction 1, create a snapshot and select * => returns nothing.
+#transaction 2, insert into table, flush
+#transaction 1, select * => returns nothing, but the snapshot should prevent the compaction code from removing the rows, no matter what the ttl duration is.
+#transaction 2, select * -> sees nothing, disable filter, select * -> sees everything, enable filter, select * -> sees nothing.
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+--echo # On Connection 1
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+--sorted_result
+SELECT * FROM t1;
+# Sleep 5 secs after creating snapshot, this ensures any records created after
+# this can't be removed by compaction until this snapshot is released.
+--sleep 5
+
+--echo # On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # On Connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1;
+
+--echo # On Connection 2
+connection con2;
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_enable_ttl_read_filtering=0;
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_enable_ttl_read_filtering=1;
+
+disconnect con2;
+disconnect con1;
+connection default;
+
+DROP TABLE t1;
+# Wait till we reached the initial number of concurrent sessions
+--source include/wait_until_count_sessions.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test
new file mode 100644
index 00000000000..dd1a97b32df
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test
@@ -0,0 +1,253 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+#
+# Create a table with multiple partitions, but in the comment don't specify
+# that per-partition based column families (CF) should be created. Expect that
+# default CF will be used and new one won't be created.
+#
+# In addition, specify TTL on one of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ PRIMARY KEY (`c1`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1);
+INSERT INTO t1 values (2);
+INSERT INTO t1 values (3);
+INSERT INTO t1 values (4);
+INSERT INTO t1 values (5);
+INSERT INTO t1 values (6);
+INSERT INTO t1 values (7);
+INSERT INTO t1 values (8);
+INSERT INTO t1 values (9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+#
+# Create a table with multiple partitions and request for separate CF to be
+# created per every partition. As a result we expect three different CF-s to be
+# created.
+#
+# In addition, specify TTL on some of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+--sorted_result
+SELECT * FROM t1;
+
+# Now 2,5,8 should be removed (this verifies that TTL is only operating on the
+# particular CF.
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s/TTL per partition and verify that ALTER TABLE + DROP
+# PRIMARY, ADD PRIMARY work for that scenario and data is persisted/filtered as
+# expected.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+--sorted_result
+SELECT * FROM t1;
+
+# TTL should be reset after alter table
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+# ...so nothing should be gone here
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+#
+# Create a table with non-partitioned TTL duration, with partitioned TTL
+# columns
+#
+# In this case the same TTL duration will be applied across different TTL
+# columns in different partitions, except for in p2 where we override the ttl
+# duration.
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;'
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+
+# here we expect only 1,2,3 to be gone, ttl implicit.
+--sorted_result
+SELECT c1 FROM t1;
+
+# here we expect only 4,5,6 to be gone, ttl based on column c2.
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT c1 FROM t1;
+
+# at this point only 7,8,9 should be left..
+DROP TABLE t1;
+
+#
+# Make sure non-partitioned TTL duration/col still works on table with
+# partitions.
+#
+# Simultaneously tests when TTL col is part of the key in partitioned table
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ PRIMARY KEY (`c1`, `c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=100;ttl_col=c2;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1),
+ PARTITION custom_p1 VALUES IN (2),
+ PARTITION custom_p2 VALUES IN (3)
+);
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# everything should still be here
+--sorted_result
+SELECT c1 FROM t1;
+
+set global rocksdb_debug_ttl_snapshot_ts = 300;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# everything should now be gone
+--sorted_result
+SELECT c1 FROM t1;
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
new file mode 100644
index 00000000000..b991f718a33
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt
@@ -0,0 +1,2 @@
+--rocksdb_enable_ttl_read_filtering=0
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
new file mode 100644
index 00000000000..fb439e109e7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test
@@ -0,0 +1,780 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Basic TTL test, pk ignored, no sk
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# no rows should be filtered
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# Basic TTL test
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# column before TTL in value
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# multi-part PK w/ TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, 4, 6, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# nullable column(s) before TTL
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int,
+ c int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kbc (b, c)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, NULL, NULL, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, NULL, NULL, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# variable len columns + null column(s) before TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`),
+KEY kbc (`b`, `c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'efghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL implicitly generated (no ttl column)
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, 5);
+INSERT INTO t1 values (2, 4, 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# TTL field as the PK
+CREATE TABLE t1 (
+ a int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kt (ts)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=ts;';
+
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_snapshot_ts = -10;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+# should all still be there..
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 10;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX(kt);
+DROP TABLE t1;
+
+# TTL field inside multi-part pk
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ ts bigint(20) UNSIGNED NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a, ts),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 3, UNIX_TIMESTAMP(), 5);
+INSERT INTO t1 values (2, 4, UNIX_TIMESTAMP(), 6);
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL field inside key with variable length things..
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64),
+`c` varbinary(256),
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`a`, `ts`),
+KEY kb (`b`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', NULL, 'bc', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('de', 'fghijk', NULL, UNIX_TIMESTAMP(), 'l');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*) FROM t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+
+# TTL test where you compact (values still exist), real_sleep, then compact again,
+# values should now be gone.
+CREATE TABLE t1 (
+a INT NOT NULL,
+b varbinary(64) NOT NULL,
+c varbinary(256) NOT NULL,
+ts bigint(20) UNSIGNED NOT NULL,
+value mediumblob NOT NULL,
+PRIMARY KEY (b,a,c),
+KEY kb (b)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=10;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values (2, 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (3, 'i', 'j', UNIX_TIMESTAMP(), 'k');
+INSERT INTO t1 values (4, 'm', 'n', UNIX_TIMESTAMP(), 'o');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+# Nothing should get removed here.
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 1 and 2 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+# 3 and 4 should get removed here.
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# TTL field with nullable ttl column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts bigint(20),
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL field with non 8-bit integer column (should fail)
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ ts int,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+# TTL duration as some random garbage value
+--error ER_RDB_TTL_DURATION_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=abc;';
+
+# TTL col is some column outside of the table
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=abc;';
+
+# TTL col must have accompanying duration
+--error ER_RDB_TTL_COL_FORMAT
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a,c),
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_col=abc;';
+
+# Make sure it doesn't filter out things early
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=500;';
+
+INSERT INTO t1 values (1, 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Testing altering table comment with updated TTL duration
+# This should trigger a rebuild of the table
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+ALTER TABLE t1 COMMENT = 'ttl_duration=1';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Tables with hidden PK disabled
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+--error ER_RDB_TTL_UNSUPPORTED
+ALTER TABLE t1 DROP PRIMARY KEY;
+
+DROP TABLE t1;
+
+# Test replacing PK, ttl should still work after
+CREATE TABLE t1 (
+ a INT PRIMARY KEY,
+ b INT,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (2,2);
+
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(b);
+set global rocksdb_debug_ttl_snapshot_ts = -3600;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+--sorted_result
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Make sure table comment filled with other text before/after will work
+# (basically, it needs semicolon before and after)
+CREATE TABLE t1 (
+ a bigint(20) UNSIGNED NOT NULL,
+ b int,
+ PRIMARY KEY (a,b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='asdadfasdfsadfadf ;ttl_duration=1; asfasdfasdfadfa';
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 1);
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+ALTER TABLE t1 COMMENT = 'adsf;;ttl_duration=5;asfasdfa;ttl_col=a;asdfasdf;';
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (UNIX_TIMESTAMP(), 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+
+# nothing removed here
+set global rocksdb_compact_cf='default';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+# all removed here
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+set global rocksdb_debug_ttl_ignore_pk=0;
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test to make sure that TTL retains original timestamp during update
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, 0);
+INSERT INTO t1 values (3, 0);
+INSERT INTO t1 values (5, 0);
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (7, 0);
+INSERT INTO t1 values (9, 0);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+UPDATE t1 SET a=a+1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 1,3,5 should be dropped
+--sorted_result
+SELECT * FROM t1;
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Test rows expired stat variable and disable ttl variable
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 1);
+INSERT INTO t1 values (3, 1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_enable_ttl=0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_enable_ttl=1;
+set global rocksdb_compact_cf='default';
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Table with TTL won't increment rows expired when no records have been
+# compacted
+CREATE TABLE t1 (
+ a bigint(20) NOT NULL,
+ b int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=100;';
+
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test update on TTL column in pk
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk=1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk=0;
+
+# 5 and 7 should be gone here
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# test behaviour on update with TTL column, TTL time can be updated here.
+CREATE TABLE t1 (
+ a INT,
+ b bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a, b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;ttl_col=b;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+INSERT INTO t1 values (5, UNIX_TIMESTAMP());
+INSERT INTO t1 values (7, UNIX_TIMESTAMP());
+
+set global rocksdb_debug_ttl_rec_ts = 300;
+UPDATE t1 SET b=(UNIX_TIMESTAMP()+1) WHERE a < 4;
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT a FROM t1;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 7 should be gone here
+--sorted_result
+SELECT a FROM t1;
+DROP TABLE t1;
+
+# Add index inplace
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, implicit TTL
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', 'd');
+INSERT INTO t1 values ('d', 'e', 'f', 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
+
+# Add index inplace, TTL column in PK
+CREATE TABLE t1 (
+`a` binary(8) NOT NULL,
+`b` varbinary(64) NOT NULL,
+`c` varbinary(256) NOT NULL,
+`ts` bigint(20) UNSIGNED NOT NULL,
+`value` mediumblob NOT NULL,
+PRIMARY KEY (`b`,`a`,`c`, `ts`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values ('a', 'b', 'c', UNIX_TIMESTAMP(), 'd');
+INSERT INTO t1 values ('d', 'e', 'f', UNIX_TIMESTAMP(), 'g');
+set global rocksdb_debug_ttl_rec_ts = 0;
+SELECT COUNT(*);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# nothing filtered out
+SELECT COUNT(*);
+
+CREATE INDEX kb on t1 (b);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+# should have filtered the rows out since ttl is passed in compaction filter
+SELECT COUNT(*) FROM t1 FORCE INDEX (kb);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
new file mode 100644
index 00000000000..aefc2f5da34
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt
@@ -0,0 +1 @@
+--rocksdb_default_cf_options=disable_auto_compactions=true
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
new file mode 100644
index 00000000000..d6be7d95f8d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test
@@ -0,0 +1,500 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# The purpose of read filtering for tables with TTL is to ensure that during a
+# transaction a key which has expired already but not removed by compaction
+# yet, is not returned to the user.
+#
+# Without this the user might be hit with problems such as disappearing rows
+# within a transaction, etc, because the compaction filter ignores snapshots
+# when filtering keys.
+
+# Basic read filtering test
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+DROP TABLE t1;
+
+# Test that some rows are hidden but others aren't...
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b BIGINT UNSIGNED NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=10;';
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 values (1, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 300;
+INSERT INTO t1 values (2, UNIX_TIMESTAMP());
+INSERT INTO t1 values (3, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+
+--echo # 1 should be hidden
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+
+--echo # none should be hidden yet, compaction runs but records aren't expired
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+
+--echo # all should be hidden now, even though compaction hasn't run again
+set global rocksdb_debug_ttl_read_filter_ts = -310;
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT a FROM t1 FORCE INDEX (kb);
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+
+DROP TABLE t1;
+
+# Test the filtering code explicitly.
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# disable filtering
+set global rocksdb_enable_ttl_read_filtering=0;
+
+--echo # should return everything
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+# enable filtering
+set global rocksdb_enable_ttl_read_filtering=1;
+
+--echo # should return nothing.
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+--echo # Read filtering index scan tests (None of these queries should return any results)
+CREATE TABLE t1 (
+ a int,
+ b int,
+ c int,
+ PRIMARY KEY (a,b,c),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (0,0,0);
+INSERT INTO t1 values (0,0,1);
+INSERT INTO t1 values (0,1,0);
+INSERT INTO t1 values (0,1,1);
+INSERT INTO t1 values (1,1,2);
+INSERT INTO t1 values (1,2,1);
+INSERT INTO t1 values (1,2,2);
+INSERT INTO t1 values (1,2,3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+select variable_value into @c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+
+set global rocksdb_force_flush_memtable_now=1;
+
+# HA_READ_KEY_EXACT, using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 AND b=2 AND c=2;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 AND b=2 AND c=2;
+
+# HA_READ_KEY_EXACT, not using full key
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a = 1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a = 1;
+
+# HA_READ_BEFORE_KEY, not using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 3;
+
+# HA_READ_BEFORE_KEY, using full key
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a < 2 AND b = 1 AND c < 3;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a < 2 AND b = 1 AND c < 3;
+
+# HA_READ_KEY_OR_NEXT
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a >= 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a >= 1;
+
+# HA_READ_AFTER_KEY, /* Find next rec. after key-record */
+SELECT min(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a > 1;
+SELECT min(a) FROM t1 FORCE INDEX (kb) WHERE a > 1;
+
+# HA_READ_PREFIX_LAST, /* Last key with the same prefix */
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE a=1 and b in (1) order by c desc;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a=1 and b in (1) order by c desc;
+
+# HA_READ_PREFIX_LAST_OR_PREV, /* Last or prev key with the same prefix */
+SELECT max(a) FROM t1 FORCE INDEX (PRIMARY) WHERE a <=10;
+SELECT max(a) FROM t1 FORCE INDEX (kb) WHERE a <=10;
+
+# need to test read_range_first()
+# calls into read_range_next() and uses compare_keys() to see if its out of
+# range
+SELECT a FROM t1 FORCE INDEX (PRIMARY) WHERE a > 0 and a <= 2;
+SELECT a FROM t1 FORCE INDEX (kb) WHERE a > 0 and a <= 2;
+
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+select variable_value-@c from information_schema.global_status where variable_name='rocksdb_rows_expired';
+DROP TABLE t1;
+
+--echo # Attempt to update expired value, should filter out
+set global rocksdb_force_flush_memtable_now=1;
+CREATE TABLE t1 (
+ a int PRIMARY KEY
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1;
+
+--echo # No error is thrown here, under the hood index_next_with_direction is
+--echo # filtering out the record from being seen in the first place.
+UPDATE t1 set a = 1;
+DROP TABLE t1;
+
+--echo # Ensure no rows can disappear in the middle of long-running transactions
+--echo # Also ensure repeatable-read works as expected
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=5;';
+
+INSERT INTO t1 values (1, 1);
+
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+
+# We need the below snippet in case establishing con1 took an arbitrary
+# amount of time. See https://github.com/facebook/mysql-5.6/pull/617#discussion_r120525391.
+--disable_query_log
+--let $snapshot_size= `SELECT COUNT(*) FROM t1`
+--let $i= 0
+while ($snapshot_size != 1)
+{
+ if ($i == 1000)
+ {
+ --die Your testing host is too slow for reasonable TTL testing
+ }
+
+ $i++;
+ ROLLBACK;
+ INSERT INTO t1 values (1,1);
+ BEGIN;
+ --let $snapshot_size= `SELECT COUNT(*) FROM t1`
+}
+--enable_query_log
+
+--echo # Nothing filtered out here
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--sleep 5
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+--echo # Switching to connection 2
+connection con2;
+--echo # compaction doesn't do anything since con1 snapshot is still open
+set global rocksdb_debug_ttl_ignore_pk = 1;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_debug_ttl_ignore_pk = 0;
+--echo # read filtered out, because on a different connection, on
+--echo # this connection the records have 'expired' already so they are filtered out
+--echo # even though they have not yet been removed by compaction
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # Switching to connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+UPDATE t1 set a = a + 1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb); # <= shouldn't be filtered out here
+
+COMMIT;
+
+--sorted_result # <= filtered out here because time has passed.
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+disconnect con1;
+disconnect con2;
+
+#transaction 1, create a snapshot and select * => returns nothing.
+#transaction 2, insert into table, flush
+#transaction 1, select * => returns nothing, but the snapshot should prevent the compaction code from removing the rows, no matter what the ttl duration is.
+#transaction 2, select * -> sees nothing, disable filter, select * -> sees everything, enable filter, select * -> sees nothing.
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+CREATE TABLE t1 (
+ a int PRIMARY KEY,
+ b int NOT NULL,
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;';
+
+--echo # On Connection 1
+connection con1;
+--echo # Creating Snapshot (start transaction)
+BEGIN;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+# Sleep 5 secs after creating snapshot, this ensures any records created after
+# this can't be removed by compaction until this snapshot is released.
+--sleep 5
+
+--echo # On Connection 2
+connection con2;
+set global rocksdb_debug_ttl_rec_ts = -2;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # On Connection 1
+connection con1;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+
+--echo # On Connection 2
+connection con2;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kb);
+set global rocksdb_enable_ttl_read_filtering=1;
+
+disconnect con2;
+disconnect con1;
+connection default;
+
+DROP TABLE t1;
+# Wait till we reached the initial number of concurrent sessions
+--source include/wait_until_count_sessions.inc
+
+# Test that index_next_with_direction skips records properly
+CREATE TABLE t1 (
+ a int,
+ b int,
+ ts bigint(20) UNSIGNED NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1;ttl_col=ts;';
+
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 VALUES (1, 1, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (2, 2, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (3, 3, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (4, 4, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (5, 5, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (6, 6, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (7, 7, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (8, 8, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (9, 9, UNIX_TIMESTAMP());
+INSERT INTO t1 VALUES (10, 10, UNIX_TIMESTAMP());
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+--echo # None are expired
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+UPDATE t1 SET ts=(UNIX_TIMESTAMP()+1) WHERE a IN (4, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+--echo # 4 and 7 should be gone
+SELECT a, b FROM t1 FORCE INDEX (kb);
+
+DROP TABLE t1;
+
+# Test range scans with various conditionals
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (c1, c2),
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1;';
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (2,2,'b');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (3,3,'c');
+INSERT INTO t1 values (4,4,'d');
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (5,5,'e');
+INSERT INTO t1 values (6,6,'f');
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (7,7,'g');
+INSERT INTO t1 values (8,8,'h');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE c1 > 5;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE c2 > 5;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY) WHERE 3 < c1 AND c1 < 6;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2) WHERE 3 < c2 AND c2 < 6;
+
+DROP TABLE t1;
+
+# Test range scans with varying expirations
+CREATE TABLE t1 (
+ a int,
+ b int,
+ PRIMARY KEY (a),
+ KEY kb (b)
+) ENGINE=rocksdb
+COMMENT='ttl_duration=1800;';
+
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (1,1);
+INSERT INTO t1 values (2,2);
+INSERT INTO t1 values (7,7);
+INSERT INTO t1 values (10,10);
+INSERT INTO t1 values (11,11);
+INSERT INTO t1 values (12,12);
+set global rocksdb_debug_ttl_rec_ts = 450;
+INSERT INTO t1 values (3,3);
+INSERT INTO t1 values (4,4);
+INSERT INTO t1 values (8,8);
+INSERT INTO t1 values (16,16);
+INSERT INTO t1 values (17,17);
+INSERT INTO t1 values (18,18);
+set global rocksdb_debug_ttl_rec_ts = 900;
+INSERT INTO t1 values (5,5);
+INSERT INTO t1 values (6,6);
+INSERT INTO t1 values (9,9);
+INSERT INTO t1 values (13,13);
+INSERT INTO t1 values (14,14);
+INSERT INTO t1 values (15,15);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--echo # Should see everything
+SELECT * FROM t1;
+
+--echo # Should have no records from the first group
+set global rocksdb_debug_ttl_read_filter_ts = -1800;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a > 5 AND a < 15;
+
+--echo # Should only have records from the last group
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 450;
+SELECT * FROM t1;
+SELECT * FROM t1 FORCE INDEX (kb) WHERE a < 10;
+
+--echo # Should be empty
+set global rocksdb_debug_ttl_read_filter_ts = -1800 - 900;
+SELECT * FROM t1;
+
+set global rocksdb_debug_ttl_read_filter_ts = 0;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
new file mode 100644
index 00000000000..4f9788ce33c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test
@@ -0,0 +1,87 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Multiple indexes, trigger compaction on sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
+
+# Trigger compaction on pk and sk and check consistency
+CREATE TABLE t1 (
+ a int NOT NULL,
+ b int NOT NULL,
+ c int NOT NULL,
+ PRIMARY KEY (a),
+ KEY kb (b) COMMENT 'kb',
+ KEY kc (c) COMMENT 'kc'
+) ENGINE=ROCKSDB
+COMMENT='ttl_duration=1';
+
+set global rocksdb_debug_ttl_rec_ts = -100;
+INSERT INTO t1 values (1, 1, 1);
+INSERT INTO t1 values (2, 2, 2);
+set global rocksdb_debug_ttl_rec_ts = 100;
+INSERT INTO t1 values (3, 3, 3);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+set global rocksdb_compact_cf='kb';
+
+# Results should be consistent
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+SELECT * FROM t1 FORCE INDEX (kb);
+SELECT * FROM t1 FORCE INDEX (kc);
+
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
new file mode 100644
index 00000000000..10a88c30361
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt
@@ -0,0 +1 @@
+--rocksdb_enable_ttl_read_filtering=0
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
new file mode 100644
index 00000000000..c10c83f9f9d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test
@@ -0,0 +1,300 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+--source include/have_partition.inc
+
+#
+# Create a table with multiple partitions, but in the comment don't specify
+# that per-partition based column families (CF) should be created. Expect that
+# default CF will be used and new one won't be created.
+#
+# In addition, specify TTL on one of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+# Add secondary keys to all tables to test compatibility.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`),
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (7, 7);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
+
+#
+# Create a table with multiple partitions and request for separate CF to be
+# created per every partition. As a result we expect three different CF-s to be
+# created.
+#
+# In addition, specify TTL on some of the partitions. Insert a few things
+# inside all the partitions, verify after compaction that the rows inside the
+# partition with TTL has disappeared.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=my_custom_cf;custom_p2_cfname=baz',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;custom_p1_ttl_duration=7;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -1200;
+INSERT INTO t1 values (1,1,'a');
+INSERT INTO t1 values (4,4,'aaaa');
+INSERT INTO t1 values (7,7,'aaaaaaa');
+
+set global rocksdb_debug_ttl_rec_ts = 1200;
+INSERT INTO t1 values (2,2,'aa');
+INSERT INTO t1 values (3,3,'aaa');
+INSERT INTO t1 values (5,5,'aaaaa');
+INSERT INTO t1 values (6,6,'aaaaaa');
+INSERT INTO t1 values (8,8,'aaaaaaaa');
+INSERT INTO t1 values (9,9,'aaaaaaaaa');
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 3600;
+set @@global.rocksdb_compact_cf = 'foo';
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# Now 2,5,8 should be removed (this verifies that TTL is only operating on the
+# particular CF.
+set @@global.rocksdb_compact_cf = 'my_custom_cf';
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with CF-s/TTL per partition and verify that ALTER TABLE + DROP
+# PRIMARY, ADD PRIMARY work for that scenario and data is persisted/filtered as
+# expected.
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`, `c2`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (c2)
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=9999;custom_p2_ttl_duration=5;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+INSERT INTO t1 VALUES (1, 1, "one", null);
+INSERT INTO t1 VALUES (2, 2, "two", null);
+INSERT INTO t1 VALUES (3, 3, "three", null);
+
+INSERT INTO t1 VALUES (4, 4, "four", null);
+INSERT INTO t1 VALUES (5, 5, "five", null);
+INSERT INTO t1 VALUES (6, 6, "six", null);
+
+INSERT INTO t1 VALUES (7, 7, "seven", null);
+INSERT INTO t1 VALUES (8, 8, "eight", null);
+INSERT INTO t1 VALUES (9, 9, "nine", null);
+
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+# TTL should be reset after alter table
+set global rocksdb_debug_ttl_rec_ts = 600;
+ALTER TABLE t1 DROP PRIMARY KEY, ADD PRIMARY KEY(`c2`,`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;';
+set global rocksdb_debug_ttl_rec_ts = 0;
+SHOW CREATE TABLE t1;
+
+# ...so nothing should be gone here
+set global rocksdb_debug_ttl_snapshot_ts = 100;
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+set global rocksdb_debug_ttl_snapshot_ts = 1200;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+
+DROP TABLE t1;
+
+#
+# Create a table with non-partitioned TTL duration, with partitioned TTL
+# columns
+#
+# In this case the same TTL duration will be applied across different TTL
+# columns in different partitions, except for in p2 where we override the ttl
+# duration.
+#
+CREATE TABLE t1 (
+ c1 BIGINT,
+ c2 BIGINT UNSIGNED NOT NULL,
+ name VARCHAR(25) NOT NULL,
+ event DATE,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;custom_p1_cfname=bar;custom_p2_cfname=baz;',
+ KEY kc2 (`c2`)
+) ENGINE=ROCKSDB
+COMMENT="ttl_duration=1;custom_p1_ttl_duration=100;custom_p1_ttl_col=c2;custom_p2_ttl_duration=5000;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 2, 3),
+ PARTITION custom_p1 VALUES IN (4, 5, 6),
+ PARTITION custom_p2 VALUES IN (7, 8, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -300;
+INSERT INTO t1 VALUES (1, UNIX_TIMESTAMP(), "one", null);
+INSERT INTO t1 VALUES (2, UNIX_TIMESTAMP(), "two", null);
+INSERT INTO t1 VALUES (3, UNIX_TIMESTAMP(), "three", null);
+set global rocksdb_debug_ttl_rec_ts = 0;
+
+INSERT INTO t1 VALUES (4, UNIX_TIMESTAMP(), "four", null);
+INSERT INTO t1 VALUES (5, UNIX_TIMESTAMP(), "five", null);
+INSERT INTO t1 VALUES (6, UNIX_TIMESTAMP(), "six", null);
+
+INSERT INTO t1 VALUES (7, UNIX_TIMESTAMP(), "seven", null);
+INSERT INTO t1 VALUES (8, UNIX_TIMESTAMP(), "eight", null);
+INSERT INTO t1 VALUES (9, UNIX_TIMESTAMP(), "nine", null);
+
+set global rocksdb_force_flush_memtable_now=1;
+set @@global.rocksdb_compact_cf = 'foo';
+set @@global.rocksdb_compact_cf = 'baz';
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+
+# here we expect only 1,2,3 to be gone, ttl implicit.
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# here we expect only 4,5,6 to be gone, ttl based on column c2.
+set global rocksdb_debug_ttl_snapshot_ts = 600;
+set @@global.rocksdb_compact_cf = 'bar';
+# Filter out expired secondary keys too
+set @@global.rocksdb_compact_cf = 'default';
+set global rocksdb_debug_ttl_snapshot_ts = 0;
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT c1 FROM t1 FORCE INDEX (kc2);
+
+# at this point only 7,8,9 should be left..
+DROP TABLE t1;
+
+#
+# Add index inplace
+#
+CREATE TABLE t1 (
+ c1 INT,
+ c2 INT,
+ PRIMARY KEY (`c1`) COMMENT 'custom_p0_cfname=foo;'
+) ENGINE=ROCKSDB
+COMMENT="custom_p0_ttl_duration=1;"
+PARTITION BY LIST(c1) (
+ PARTITION custom_p0 VALUES IN (1, 4, 7),
+ PARTITION custom_p1 VALUES IN (2, 5, 8),
+ PARTITION custom_p2 VALUES IN (3, 6, 9)
+);
+
+set global rocksdb_debug_ttl_rec_ts = -3600;
+INSERT INTO t1 values (1, 1);
+INSERT INTO t1 values (4, 4);
+INSERT INTO t1 values (7, 7);
+set global rocksdb_debug_ttl_rec_ts = 0;
+INSERT INTO t1 values (2, 2);
+INSERT INTO t1 values (3, 3);
+INSERT INTO t1 values (5, 5);
+INSERT INTO t1 values (6, 6);
+INSERT INTO t1 values (8, 8);
+INSERT INTO t1 values (9, 9);
+
+# Nothing filtered out
+--sorted_result
+SELECT * FROM t1;
+
+CREATE INDEX kc2 on t1 (c2);
+
+set global rocksdb_force_flush_memtable_now=1;
+set global rocksdb_compact_cf='foo';
+set global rocksdb_compact_cf='default';
+
+# 1,4, and 7 should be gone
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (PRIMARY);
+--sorted_result
+SELECT * FROM t1 FORCE INDEX (kc2);
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test b/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test
index 79232255d25..983c70256b0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test
@@ -70,27 +70,25 @@ SELECT a FROM t1 WHERE FIND_IN_SET('Europe',a) > 0;
DROP TABLE t1;
---disable_parsing
-#--error ER_GET_ERRMSG
CREATE TABLE t1 (
a SET('N.America','S.America','Africa','Antarctica','Australia','Europe','Asia'),
b SET('test1','test2','test3','test4','test5'),
c SET('01','22','23','33','34','39','40','44','50','63','64'),
pk SET('1','2','3','4','5','6','7','8','9') PRIMARY KEY,
UNIQUE INDEX b_a (b,a)
-) ENGINE=innodb;
+) ENGINE=rocksdb;
SHOW INDEX IN t1;
-INSERT INTO t1 (a,b,c) VALUES
-('','test2,test3','01,34,44,23'),
-('',5,2),
-('N.America,Asia','test4,test2',''),
-('Africa,Europe,Asia','test2,test3','01'),
-('Antarctica','test3','34,44'),
-('Asia','test5','50'),
-('Europe,S.America','test1,','39');
+INSERT INTO t1 (a,b,c,pk) VALUES
+('','test2,test3','01,34,44,23',1),
+('',5,2,2),
+('N.America,Asia','test4,test2','',3),
+('Africa,Europe,Asia','test2,test3','01',4),
+('Antarctica','test3','34,44',5),
+('Asia','test5','50',6),
+('Europe,S.America','test1,','39',7);
--replace_column 9 #
EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
@@ -99,6 +97,4 @@ SELECT DISTINCT a, b FROM t1 ORDER BY b DESC, a;
DROP TABLE t1;
---enable_parsing
-
SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK = @ORIG_PAUSE_BACKGROUND_WORK;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt
new file mode 100644
index 00000000000..33e72265db2
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt
@@ -0,0 +1 @@
+--rocksdb_debug_optimizer_n_rows=10
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc
index f813b8871b1..bcca0c3a499 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc
+++ b/storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc
@@ -43,6 +43,7 @@ insert into t1 values (1, 'a ', 'a-space');
insert into t1 values (2, 'a', 'a');
insert into t1 values (3, 'a \t', 'a-tab');
+analyze table t1;
--echo # Must show 'using index' for latin1_bin and utf8_bin:
--replace_column 9 #
explain
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test b/storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test
index 0c7df020ef5..a15e5d2a7be 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test
@@ -8,17 +8,17 @@ insert into t1 values (1,1);
SET session transaction isolation level read uncommitted;
begin;
---error ER_UNKNOWN_ERROR
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
insert into t1 values (2,1);
---error ER_UNKNOWN_ERROR
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
select * from t1 where id=1;
rollback;
SET session transaction isolation level serializable;
begin;
---error ER_UNKNOWN_ERROR
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
insert into t1 values (2,1);
---error ER_UNKNOWN_ERROR
+--error ER_ISOLATION_MODE_NOT_SUPPORTED
select * from t1 where id=1;
rollback;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/update.test b/storage/rocksdb/mysql-test/rocksdb/t/update.test
index e0a49ee8ca5..51e2c5be0a6 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/update.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/update.test
@@ -61,10 +61,10 @@ BEGIN;
UPDATE t1 SET b = 'update2' WHERE a <= 100;
SAVEPOINT spt1;
UPDATE t1 SET b = '';
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_TO_SAVEPOINT
ROLLBACK TO SAVEPOINT spt1;
UPDATE t1 SET b = 'upd' WHERE a = 10050;
---error ER_UNKNOWN_ERROR
+--error ER_ROLLBACK_ONLY
COMMIT;
SELECT * FROM t1 ORDER BY pk;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
index 87d31d2e2d9..071dbece7b0 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test
@@ -36,7 +36,7 @@ source include/search_pattern_in_file.inc;
--exec echo "wait" >$_expect_file_name
shutdown_server 10;
---exec echo "restart:--rocksdb_use_direct_writes=1 --rocksdb_allow_mmap_writes=1" >$_expect_file_name
+--exec echo "restart:--rocksdb_use_direct_io_for_flush_and_compaction=1 --rocksdb_allow_mmap_writes=1" >$_expect_file_name
--sleep 0.1
--exec echo "restart:" >$_expect_file_name
@@ -44,5 +44,5 @@ shutdown_server 10;
--source include/wait_until_connected_again.inc
--disable_reconnect
-let SEARCH_PATTERN=enable both use_direct_writes;
+let SEARCH_PATTERN=enable both use_direct_io_for_flush_and_compaction;
source include/search_pattern_in_file.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test b/storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test
new file mode 100644
index 00000000000..d00a8b7afbe
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test
@@ -0,0 +1,131 @@
+--source include/have_debug.inc
+--source include/have_rocksdb.inc
+
+# Create a table with a varbinary key with the current format and validate
+# that it sorts correctly
+CREATE TABLE t1(
+ vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+
+SELECT hex(vb) FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
+# Now create the same table in the old format to show that they can be read
+# and handled correctly
+set session debug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+ vb VARBINARY(64) primary key
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+
+INSERT INTO t1 values(0x00);
+INSERT INTO t1 values(0x0000);
+INSERT INTO t1 values(0x0000000000000000);
+INSERT INTO t1 values(0x000000);
+INSERT INTO t1 values(0x000000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x0000000000000000000000000000000000000000000000000000000000000000);
+INSERT INTO t1 values(0x00000000);
+INSERT INTO t1 values(0x0000000000);
+INSERT INTO t1 values(0x00000000000000000000);
+INSERT INTO t1 values(0x000000000000);
+INSERT INTO t1 values(0x00000000000000);
+INSERT INTO t1 values(0x000000000000000000);
+
+SELECT hex(vb) FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT hex(vb) FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
+# Now create a table with a varchar key using a binary collation with the
+# current format and validate that it sorts correctly
+CREATE TABLE t1(
+ vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+
+SELECT * FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
+# Now create the same table in the old format to show that they can be read
+# and handled correctly
+set session debug= '+d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+CREATE TABLE t1(
+ vc VARCHAR(64) collate 'binary' primary key
+) ENGINE=rocksdb;
+set session debug= '-d,MYROCKS_LEGACY_VARBINARY_FORMAT';
+
+INSERT INTO t1 values('a');
+INSERT INTO t1 values('aa');
+INSERT INTO t1 values('aaaaaaaa');
+INSERT INTO t1 values('aaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
+INSERT INTO t1 values('aaaa');
+INSERT INTO t1 values('aaaaa');
+INSERT INTO t1 values('aaaaaaaaaa');
+INSERT INTO t1 values('aaaaaa');
+INSERT INTO t1 values('aaaaaaa');
+INSERT INTO t1 values('aaaaaaaaa');
+
+SELECT * FROM t1;
+
+# Use the fact that the rocksdb_locks shows the keys as they are encoded to
+# validate that the keys were encoded as expected
+BEGIN;
+SELECT * FROM t1 FOR UPDATE;
+SELECT SUBSTRING(a.key,9) FROM information_schema.rocksdb_locks AS a ORDER BY a.key;
+ROLLBACK;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
index 804889dcec9..ffa795bef88 100644
--- a/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
+++ b/storage/rocksdb/mysql-test/rocksdb/t/write_sync.test
@@ -6,8 +6,9 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit= @@global.rocksdb_flush_log_at_trx_commit;
-SET LOCAL rocksdb_flush_log_at_trx_commit=0;
-sleep 30;
+set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
+--exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
@@ -16,28 +17,28 @@ select variable_value-@a from information_schema.global_status where variable_na
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-SET LOCAL rocksdb_flush_log_at_trx_commit=1;
+SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
+--exec sleep 5
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(4,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-insert aaa(id, i) values(5,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-insert aaa(id, i) values(6,1);
-select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
-SET GLOBAL rocksdb_background_sync=on;
-SET LOCAL rocksdb_flush_log_at_trx_commit=0;
-insert aaa(id, i) values(7,1);
+let $status_var=rocksdb_wal_synced;
+let $status_var_value=`select @a+1`;
+source include/wait_for_status_var.inc;
+
+SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
+--exec sleep 5
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
+insert aaa(id, i) values(5,1);
let $status_var=rocksdb_wal_synced;
-let $status_var_value=`select @a+4`;
+let $status_var_value=`select @a+1`;
source include/wait_for_status_var.inc;
truncate table aaa;
# Cleanup
drop table aaa;
-SET GLOBAL rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
+set @@global.rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_write_disable_wal=false;
SET GLOBAL rocksdb_write_ignore_missing_column_families=false;
-SET GLOBAL rocksdb_background_sync=off;
-
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh
new file mode 100755
index 00000000000..a4d60dc864c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh
@@ -0,0 +1,16 @@
+set -e
+
+COPY_LOG=$1
+SIGNAL_FILE=$2
+# Creating a table after myrocks_hotbackup reaches waiting loop
+
+done=0
+while : ; do
+ wait=`tail -1 $COPY_LOG | grep 'Waiting until' | wc -l`
+ if [ "$wait" -eq "1" ]; then
+ break
+ fi
+ sleep 1
+done
+$MYSQL --defaults-group-suffix=.1 db1 -e "create table r10 (id int primary key ) engine=rocksdb"
+touch $SIGNAL_FILE
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
index ef505e4b888..ecf8a851267 100755
--- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
if [ "$STREAM_TYPE" == 'wdt' ]; then
which wdt >/dev/null 2>&1
if [ $? -ne 0 ]; then
@@ -7,6 +9,7 @@ if [ "$STREAM_TYPE" == 'wdt' ]; then
fi
set -e
+set -o pipefail
# Takes a full backup from server_1 to server_2
# using myrocks_hotbackup streaming
@@ -29,25 +32,37 @@ rm -rf $dest_data_dir/
mkdir $dest_data_dir
COPY_LOG="${MYSQL_TMP_DIR}/myrocks_hotbackup_copy_log"
+SIGNAL_CONDITION=""
+SIGNAL_FILE=${MYSQL_TMP_DIR}/myrocks_hotbackup_signal
+rm -f $COPY_LOG
+rm -f $SIGNAL_FILE
+
+if [ "$FRM" == '1' ]; then
+ suite/rocksdb_hotbackup/include/create_table.sh $COPY_LOG $SIGNAL_FILE 2>&1 &
+fi
+
+if [ "$DEBUG_SIGNAL" == '1' ]; then
+ SIGNAL_CONDITION="--debug_signal_file=$SIGNAL_FILE"
+fi
if [ "$STREAM_TYPE" == 'tar' ]; then
BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \
- --stream=tar --checkpoint_dir=$checkpoint_dir 2> \
+ --stream=tar --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
$COPY_LOG | tar -xi -C $backup_dir"
elif [ "$STREAM_TYPE" == 'xbstream' ]; then
BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --port=${MASTER_MYPORT} \
- --stream=xbstream --checkpoint_dir=$checkpoint_dir 2> \
+ --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
$COPY_LOG | xbstream -x \
--directory=$backup_dir"
elif [ "$STREAM_TYPE" == "xbstream_socket" ]; then
BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --socket=${MASTER_MYSOCK} \
- --stream=xbstream --checkpoint_dir=$checkpoint_dir 2> \
+ --stream=xbstream --checkpoint_dir=$checkpoint_dir $SIGNAL_CONDITION 2> \
$COPY_LOG | xbstream -x \
--directory=$backup_dir"
else
BACKUP_CMD="$MYSQL_MYROCKS_HOTBACKUP --user='root' --stream=wdt \
--port=${MASTER_MYPORT} --destination=localhost --backup_dir=$backup_dir \
- --avg_mbytes_per_sec=10 --interval=5 \
+ --avg_mbytes_per_sec=10 --interval=5 $SIGNAL_CONDITION \
--extra_wdt_sender_options='--block_size_mbytes=1' \
--checkpoint_dir=$checkpoint_dir 2> \
$COPY_LOG"
@@ -55,10 +70,6 @@ fi
echo "myrocks_hotbackup copy phase"
eval "$BACKUP_CMD"
-if [ $? -ne 0 ]; then
- tail $COPY_LOG
- exit 1
-fi
mkdir ${backup_dir}/test # TODO: Fix skipping empty directories
@@ -70,7 +81,3 @@ $MYSQL_MYROCKS_HOTBACKUP --move_back --datadir=$dest_data_dir \
--rocksdb_waldir=$dest_data_dir/.rocksdb \
--backup_dir=$backup_dir > $MOVEBACK_LOG 2>&1
-if [ $? -ne 0 ]; then
- tail $MOVEBACK_LOG
- exit 1
-fi
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result
index d3f2ebc4e6f..31ed2677444 100644
--- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result
@@ -10,6 +10,7 @@ key (`k`)
) engine=rocksdb;
include/rpl_stop_server.inc [server_number=2]
myrocks_hotbackup copy phase
+myrocks_hotbackup copy phase
myrocks_hotbackup move-back phase
include/rpl_start_server.inc [server_number=2]
select count(*) from db1.t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
index 9bfab4252c4..52456a68140 100644
--- a/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
+++ b/storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test
@@ -5,6 +5,9 @@ source suite/rocksdb_hotbackup/include/setup.inc;
--let $rpl_server_number= 2
--source include/rpl_stop_server.inc
+--error 1
+--exec STREAM_TYPE=xbstream FRM=1 DEBUG_SIGNAL=1 suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
+
--exec STREAM_TYPE=xbstream suite/rocksdb_hotbackup/include/stream_run.sh 2>&1
--let $rpl_server_number= 2
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
index 7a7400f17e1..493107ec071 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result
@@ -1,4 +1,5 @@
DROP TABLE IF EXISTS t1;
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
'con1'
@@ -7,12 +8,10 @@ SET DEBUG_SYNC='rocksdb.prepared SIGNAL parked WAIT_FOR go';
insert into t1 values (1, 1, "iamtheogthealphaandomega");;
'con2'
insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
-SET GLOBAL ROCKSDB_ENABLE_2PC = OFF;
-SET GLOBAL ROCKSDB_WRITE_SYNC = OFF;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
SET GLOBAL SYNC_BINLOG = 0;
SET DEBUG_SYNC='now WAIT_FOR parked';
-SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
-SET GLOBAL ROCKSDB_WRITE_SYNC = ON;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
SET DEBUG_SYNC='now SIGNAL go';
@@ -23,5 +22,5 @@ a b c
1 1 iamtheogthealphaandomega
select count(*) from t1;
count(*)
-1000000
+4096
drop table t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
new file mode 100644
index 00000000000..8a1fd1b94e0
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result
@@ -0,0 +1,39 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+include/rpl_connect.inc [creating slave_block]
+drop table if exists t1;
+create table t1 (i int);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+insert into t1 values (1), (2), (3);
+connection slave
+select * from t1;
+i
+1
+2
+3
+connection slave_block
+lock tables t1 read;
+connection master;
+create high_priority index idx1 on t1 (i);
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+connection slave;
+show create table t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `i` int(11) DEFAULT NULL,
+ KEY `idx1` (`i`)
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+drop table t1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result
index e0dbc92cdf5..5559bf6168c 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result
@@ -10,7 +10,7 @@ create table t1(a int);
set session binlog_format=STATEMENT;
insert into t1 values(1);
include/wait_for_slave_sql_error.inc [errno=1756]
-Last_SQL_Error = 'Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave. rpl_skip_tx_api recovery should only be used when master's binlog format is ROW.'
+Last_SQL_Error = 'Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave, this should only be used when master's binlog format is ROW.'
"Table after error"
select * from t1;
a
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
new file mode 100644
index 00000000000..3d734c9498d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result
@@ -0,0 +1,24 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+call mtr.add_suppression("Recovery from master pos");
+drop table if exists r1;
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+set global rocksdb_force_flush_memtable_now=1;
+include/rpl_start_server.inc [server_number=2]
+include/start_slave.inc
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+id1 id2
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+id2
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+id1 id2
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+id2
+drop table r1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result
new file mode 100644
index 00000000000..609d4a8821a
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result
@@ -0,0 +1,25 @@
+include/master-slave.inc
+Warnings:
+Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
+[connection master]
+drop table if exists r1;
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+set sql_log_bin=0;
+delete from r1 where id1=1 and id2=1000;
+set sql_log_bin=1;
+set global rocksdb_force_flush_memtable_now=1;
+insert into r1 values (1, 1000);
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary);
+id1 id2
+select id2 from r1 force index (i);
+id2
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary);
+id1 id2
+select id2 from r1 force index (i);
+id2
+drop table r1;
+include/rpl_end.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-mater.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt
index c747adc94d5..c747adc94d5 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-mater.opt
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
index f47f83b0bd2..6143824eea6 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test
@@ -3,6 +3,11 @@
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/big_test.inc
+# The test involves a crash which does not seem to be handled well with
+# mysql-test/lib/My/SafeProcess/my_safe_process under valgrind as it hangs
+# forever. The test did not mean to verify the memory leaks so not much
+# coverage should be missed by not running it under valgrind.
+--source include/not_valgrind.inc
--exec echo > $MYSQLTEST_VARDIR/log/mysqld.1.err
@@ -10,16 +15,18 @@
DROP TABLE IF EXISTS t1;
--enable_warnings
+# Set it to the minimum so that we can make the binlog rotate with a few inserts
+SET GLOBAL MAX_BINLOG_SIZE = 4096;
SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
create table t1 (a int primary key, b int, c varchar(255)) engine=rocksdb;
connect (con1, localhost, root,,);
connect (con2, localhost, root,,);
-# On connection one we insert a row and pause after commit marker is written to WAL.
-# Connection two then inserts many rows. After connection two
-# completes connection one continues only to crash before commit but after
-# binlog write. On crash recovery we see that connection one's value
+# On connection one we insert a row and pause after prepare marker is written to
+# WAL. Connection two then inserts many rows to rotate the binlog. After
+# connection two completes, connection one continues only to crash before commit
+# but after binlog write. On crash recovery we see that connection one's value
# has been recovered and commited
connection con1;
--echo 'con1'
@@ -35,14 +42,14 @@ insert into t1 values (2, 1, "i_am_just_here_to_trigger_a_flush");
# Disable 2PC and syncing for faster inserting of dummy rows
# These rows only purpose is to rotate the binlog
-SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
-SET GLOBAL ROCKSDB_WRITE_SYNC = OFF;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
SET GLOBAL SYNC_BINLOG = 0;
SET DEBUG_SYNC='now WAIT_FOR parked';
--disable_query_log
--let $pk= 3
-while ($pk < 1000000) {
+# binlog size is 4096 bytes so with that many insertion it will definitely rotate
+while ($pk < 4096) {
eval insert into t1 values ($pk, 1, "foobardatagoesheresothatmorelogsrollwhichiswhatwewant");
--inc $pk
}
@@ -50,18 +57,16 @@ while ($pk < 1000000) {
# re-enable 2PC an syncing then write to trigger a flush
# before we trigger the crash to simulate full-durability
-SET GLOBAL ROCKSDB_ENABLE_2PC = ON;
-SET GLOBAL ROCKSDB_WRITE_SYNC = ON;
+SET GLOBAL ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
SET GLOBAL SYNC_BINLOG = 1;
insert into t1 values (1000000, 1, "i_am_just_here_to_trigger_a_flush");
SET DEBUG_SYNC='now SIGNAL go';
-
+--source include/wait_until_disconnected.inc
--enable_reconnect
--source include/wait_until_connected_again.inc
-
---exec sleep 60
+--disable_reconnect
--exec python suite/rocksdb/t/check_log_for_xa.py $MYSQLTEST_VARDIR/log/mysqld.1.err commit,prepare,rollback
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
new file mode 100644
index 00000000000..7cf4a4d32b5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test
@@ -0,0 +1,2 @@
+--source include/have_rocksdb.inc
+--source include/rpl_ddl_high_priority.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
index 949fbad666d..f1b1b16704f 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test
@@ -1,3 +1,4 @@
+-- source include/have_rocksdb.inc
-- source include/have_gtid.inc
-- source include/master-slave.inc
-- source include/have_debug.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc
index 43ee7ec526c..a52bfc9186d 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc
@@ -1,3 +1,4 @@
+source include/have_rocksdb.inc;
source include/master-slave.inc;
-- let $uuid = `select @@server_uuid;`
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test
index 56c0eac2517..d1793c4af1e 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test
@@ -1,4 +1,5 @@
# based on rpl/rpl_gtid_innondb_sys_header.test
+source include/have_rocksdb.inc;
source include/master-slave.inc;
source include/have_gtid.inc;
source include/have_debug.inc;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test
index 8c79d2afa03..cecacda44e8 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test
@@ -2,5 +2,6 @@
--echo # Ensure skip_unique_check is set when lag exceeds lag_threshold
--echo #
+--source include/have_rocksdb.inc
--source ../include/rpl_no_unique_check_on_lag.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test
index c5cf1a8ae92..7e77ec87c3b 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test
@@ -1,2 +1,3 @@
+--source include/have_rocksdb.inc
--source ../include/rpl_no_unique_check_on_lag.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test
index 37f80c8ace5..200f1cb314e 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test
@@ -1,3 +1,4 @@
+--source include/have_rocksdb.inc
--source include/master-slave.inc
--source include/have_binlog_format_row.inc
@@ -38,9 +39,9 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=rocksdb;
INSERT INTO t1 VALUES(1);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
---error ER_UNKNOWN_ERROR
+--error ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT SNAPSHOT;
---error ER_UNKNOWN_ERROR
+--error ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT
START TRANSACTION WITH CONSISTENT ROCKSDB SNAPSHOT;
ROLLBACK;
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test
index 2b590f84653..79d71f20e8a 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test
@@ -1,3 +1,4 @@
+--source include/have_rocksdb.inc
--source include/master-slave.inc
--source include/have_binlog_format_row.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt
index 67f0fcf77f0..b3d52445ad8 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt
@@ -1,2 +1,3 @@
--gtid_mode=ON --enforce_gtid_consistency --log_slave_updates --max_binlog_size=50000
--slave_parallel_workers=30 --relay_log_recovery=1 --rocksdb_unsafe_for_binlog=TRUE
+--rocksdb_wal_recovery_mode=2
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test
index 22151d14547..1ea9add8019 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test
@@ -1,5 +1,6 @@
# Checks if the slave stops executing transactions when master's binlog format
# is STATEMENT but rpl_skip_tx_api is enabled
+-- source include/have_rocksdb.inc
-- source include/master-slave.inc
call mtr.add_suppression("Master's binlog format is not ROW but rpl_skip_tx_api is enabled on the slave");
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf
new file mode 100644
index 00000000000..71e124adc81
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf
@@ -0,0 +1,15 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+
+[mysqld.2]
+relay_log_recovery=1
+relay_log_info_repository=FILE
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+slave_use_idempotent_for_recovery=Yes
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
new file mode 100644
index 00000000000..9180afa881f
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test
@@ -0,0 +1,72 @@
+
+--source include/have_binlog_format_row.inc
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_gtid.inc
+--source include/not_valgrind.inc
+
+# This is a test case for issue#655 -- SingleDelete on Primary Key may
+# cause extra rows than Secondary Keys
+
+call mtr.add_suppression("Recovery from master pos");
+
+connection master;
+--disable_warnings
+drop table if exists r1;
+--enable_warnings
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+
+sync_slave_with_master;
+connection slave;
+set global rocksdb_force_flush_memtable_now=1;
+--let slave_data_dir= query_get_value(SELECT @@DATADIR, @@DATADIR, 1)
+--let slave_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1)
+--let slave_pid_file= query_get_value(SELECT @@pid_file, @@pid_file, 1)
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+
+--write_file $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+#!/bin/bash
+
+F=$slave_data_dir/$slave_binlog_file
+SIZE=`stat -c %s $F`
+NEW_SIZE=`expr $SIZE - 100`
+truncate -s $NEW_SIZE $F
+rc=$?
+if [[ $rc != 0 ]]; then
+ exit 1
+fi
+
+kill -9 `head -1 $slave_pid_file`
+
+exit 0
+EOF
+--chmod 0755 $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+--exec $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+
+# Crash recovery (losing some binlogs) with slave_use_idempotent_for_recovery may
+# replay same transactions with slave_exec_mode=idempotent implicitly enabled.
+# On slave, the last insert is converted to update with the same key.
+# It should be treated as SD and Put (same as singledelete_idempotent_table.test).
+
+--source include/rpl_start_server.inc
+--source include/start_slave.inc
+connection master;
+sync_slave_with_master;
+connection slave;
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary) where id1=1 and id2=1000;
+select id2 from r1 force index (i) where id1=1 and id2=1000;
+
+connection master;
+drop table r1;
+
+--remove_file $MYSQL_TMP_DIR/truncate_tail_binlog.sh
+--source include/rpl_end.inc
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf
new file mode 100644
index 00000000000..ad4894f5b38
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf
@@ -0,0 +1,15 @@
+!include suite/rpl/my.cnf
+
+[mysqld.1]
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+
+[mysqld.2]
+relay_log_recovery=1
+relay_log_info_repository=FILE
+log_slave_updates
+gtid_mode=ON
+enforce_gtid_consistency=ON
+rbr_idempotent_tables='r1'
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test
new file mode 100644
index 00000000000..23d335d6b57
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test
@@ -0,0 +1,44 @@
+
+--source include/have_binlog_format_row.inc
+--source include/have_rocksdb.inc
+--source include/master-slave.inc
+--source include/have_gtid.inc
+--source include/not_valgrind.inc
+
+# This is a test case for issue#655 -- SingleDelete on Primary Key may
+# cause extra rows than Secondary Keys
+
+connection master;
+--disable_warnings
+drop table if exists r1;
+--enable_warnings
+create table r1 (id1 int, id2 int, primary key (id1, id2), index i (id2)) engine=rocksdb;
+insert into r1 values (1, 1000);
+set sql_log_bin=0;
+delete from r1 where id1=1 and id2=1000;
+set sql_log_bin=1;
+
+sync_slave_with_master;
+connection slave;
+set global rocksdb_force_flush_memtable_now=1;
+
+connection master;
+# same key insert on slave. Since slave sets rbr_idempotent_tables, the insert
+# is converted to update with the same key. MyRocks should call SD and Put for the key
+insert into r1 values (1, 1000);
+sync_slave_with_master;
+
+connection slave;
+delete r1 from r1 force index (i) where id2=1000;
+select id1,id2 from r1 force index (primary);
+select id2 from r1 force index (i);
+set global rocksdb_compact_cf='default';
+select id1,id2 from r1 force index (primary);
+select id2 from r1 force index (i);
+
+connection master;
+drop table r1;
+
+--source include/rpl_end.inc
+
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
index 1e9b0a9d3bb..20c2d025e0c 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf
@@ -8,3 +8,4 @@ default-storage-engine=rocksdb
sql-mode=NO_ENGINE_SUBSTITUTION
explicit-defaults-for-timestamp=1
loose-rocksdb_lock_wait_timeout=1
+loose-rocksdb_force_compute_memtable_stats_cachetime=0
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result
index 9f21825d262..159d6a983c8 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result
@@ -9,7 +9,5 @@ There should be *no* long test name listed below:
select variable_name as `There should be *no* variables listed below:` from t2
left join t1 on variable_name=test_name where test_name is null ORDER BY variable_name;
There should be *no* variables listed below:
-ROCKSDB_ENABLE_2PC
-ROCKSDB_ENABLE_2PC
drop table t1;
drop table t2;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result
index 93ec1aec407..3b174fbbc63 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result
@@ -1,64 +1,7 @@
-CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO valid_values VALUES(1);
-INSERT INTO valid_values VALUES(0);
-INSERT INTO valid_values VALUES('on');
-CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO invalid_values VALUES('\'aaa\'');
-INSERT INTO invalid_values VALUES('\'bbb\'');
SET @start_global_value = @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
SELECT @start_global_value;
@start_global_value
0
-'# Setting to valid values in global scope#'
-"Trying to set variable @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to 1"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = 1;
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-1
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = DEFAULT;
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-0
-"Trying to set variable @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to 0"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = 0;
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-0
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = DEFAULT;
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-0
-"Trying to set variable @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to on"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = on;
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-1
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = DEFAULT;
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-0
-"Trying to set variable @@session.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to 444. It should fail because it is not session."
-SET @@session.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = 444;
-ERROR HY000: Variable 'rocksdb_allow_concurrent_memtable_write' is a GLOBAL variable and should be set with SET GLOBAL
-'# Testing with invalid values in global scope #'
-"Trying to set variable @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to 'aaa'"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = 'aaa';
-Got one of the listed errors
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-0
-"Trying to set variable @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to 'bbb'"
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = 'bbb';
-Got one of the listed errors
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-0
-SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = @start_global_value;
-SELECT @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE;
-@@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
-0
-DROP TABLE valid_values;
-DROP TABLE invalid_values;
+"Trying to set variable @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE = 444;
+ERROR HY000: Variable 'rocksdb_allow_concurrent_memtable_write' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_background_sync_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_background_sync_basic.result
deleted file mode 100644
index 8998bfee64d..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_background_sync_basic.result
+++ /dev/null
@@ -1,68 +0,0 @@
-CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO valid_values VALUES(1);
-INSERT INTO valid_values VALUES(0);
-INSERT INTO valid_values VALUES('on');
-INSERT INTO valid_values VALUES('off');
-CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO invalid_values VALUES('\'aaa\'');
-SET @start_global_value = @@global.ROCKSDB_BACKGROUND_SYNC;
-SELECT @start_global_value;
-@start_global_value
-0
-'# Setting to valid values in global scope#'
-"Trying to set variable @@global.ROCKSDB_BACKGROUND_SYNC to 1"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = 1;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-1
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = DEFAULT;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-"Trying to set variable @@global.ROCKSDB_BACKGROUND_SYNC to 0"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = 0;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = DEFAULT;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-"Trying to set variable @@global.ROCKSDB_BACKGROUND_SYNC to on"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = on;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-1
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = DEFAULT;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-"Trying to set variable @@global.ROCKSDB_BACKGROUND_SYNC to off"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = off;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = DEFAULT;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-"Trying to set variable @@session.ROCKSDB_BACKGROUND_SYNC to 444. It should fail because it is not session."
-SET @@session.ROCKSDB_BACKGROUND_SYNC = 444;
-ERROR HY000: Variable 'rocksdb_background_sync' is a GLOBAL variable and should be set with SET GLOBAL
-'# Testing with invalid values in global scope #'
-"Trying to set variable @@global.ROCKSDB_BACKGROUND_SYNC to 'aaa'"
-SET @@global.ROCKSDB_BACKGROUND_SYNC = 'aaa';
-Got one of the listed errors
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-SET @@global.ROCKSDB_BACKGROUND_SYNC = @start_global_value;
-SELECT @@global.ROCKSDB_BACKGROUND_SYNC;
-@@global.ROCKSDB_BACKGROUND_SYNC
-0
-DROP TABLE valid_values;
-DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_base_background_compactions_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_base_background_compactions_basic.result
deleted file mode 100644
index 09acaada0c6..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_base_background_compactions_basic.result
+++ /dev/null
@@ -1,7 +0,0 @@
-SET @start_global_value = @@global.ROCKSDB_BASE_BACKGROUND_COMPACTIONS;
-SELECT @start_global_value;
-@start_global_value
-1
-"Trying to set variable @@global.ROCKSDB_BASE_BACKGROUND_COMPACTIONS to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_BASE_BACKGROUND_COMPACTIONS = 444;
-ERROR HY000: Variable 'rocksdb_base_background_compactions' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
new file mode 100644
index 00000000000..a59ba561181
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result
@@ -0,0 +1,100 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 1"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 1;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 0"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 0;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to on"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = on;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = DEFAULT;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'aaa'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+"Trying to set variable @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED to 'bbb'"
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_global_value;
+SELECT @@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@global.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+SET @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED = @start_session_value;
+SELECT @@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED;
+@@session.ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
new file mode 100644
index 00000000000..11d4f2363f6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_concurrent_prepare_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444;
+ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result
index 35e4d252e11..ba280a32ab2 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result
@@ -12,4 +12,4 @@ SET @@global.ROCKSDB_CREATE_CHECKPOINT = DEFAULT;
SET @@session.ROCKSDB_CREATE_CHECKPOINT = 444;
ERROR HY000: Variable 'rocksdb_create_checkpoint' is a GLOBAL variable and should be set with SET GLOBAL
SET @@global.ROCKSDB_CREATE_CHECKPOINT = @start_value;
-ERROR HY000: RocksDB: Failed to create checkpoint directory. status 5 IO error: .tmp: No such file or directory
+ERROR HY000: Status error 5 received from RocksDB: IO error: While renaming a file to : .tmp: No such file or directory
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
new file mode 100644
index 00000000000..6f05268745d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result
@@ -0,0 +1,79 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_global_value;
+@start_global_value
+50
+SET @start_session_value = @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+SELECT @start_session_value;
+@start_session_value
+50
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 100"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 100;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+100
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH to 2"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = 2;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+2
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = DEFAULT;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to 'aaa'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+"Trying to set variable @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH to '123'"
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_global_value;
+SELECT @@global.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@global.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+SET @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH = @start_session_value;
+SELECT @@session.ROCKSDB_DEADLOCK_DETECT_DEPTH;
+@@session.ROCKSDB_DEADLOCK_DETECT_DEPTH
+50
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
new file mode 100644
index 00000000000..1d8eb721c1b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 1"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 1;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 0"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 0;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to on"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = on;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_IGNORE_PK = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_ignore_pk' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK to 'bbb'"
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+SET @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_IGNORE_PK;
+@@global.ROCKSDB_DEBUG_TTL_IGNORE_PK
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result
new file mode 100644
index 00000000000..bbc46001817
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to 2400"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = 2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to -2400"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = -2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+-2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_read_filter_ts' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+SET @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS;
+@@global.ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result
new file mode 100644
index 00000000000..347ba9a0b3d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_REC_TS to 2400"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = 2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_REC_TS to -2400"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = -2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+-2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_REC_TS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_REC_TS = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_rec_ts' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_REC_TS to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+SET @@global.ROCKSDB_DEBUG_TTL_REC_TS = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_REC_TS;
+@@global.ROCKSDB_DEBUG_TTL_REC_TS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result
new file mode 100644
index 00000000000..03a937ef218
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to 2400"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = 2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to -2400"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = -2400;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+-2400
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = DEFAULT;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+"Trying to set variable @@session.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = 444;
+ERROR HY000: Variable 'rocksdb_debug_ttl_snapshot_ts' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS to 'aaa'"
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+SET @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS = @start_global_value;
+SELECT @@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS;
+@@global.ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result
index 3eefd822e69..13749e1c220 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result
@@ -11,7 +11,7 @@ INSERT INTO invalid_values VALUES('\'484436\'');
SET @start_global_value = @@global.ROCKSDB_DELAYED_WRITE_RATE;
SELECT @start_global_value;
@start_global_value
-16777216
+0
'# Setting to valid values in global scope#'
"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 100"
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 100;
@@ -22,7 +22,7 @@ SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = DEFAULT;
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 1"
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 1;
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@ -32,7 +32,7 @@ SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = DEFAULT;
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 0"
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 0;
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@ -42,7 +42,7 @@ SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = DEFAULT;
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
"Trying to set variable @@session.ROCKSDB_DELAYED_WRITE_RATE to 444. It should fail because it is not session."
SET @@session.ROCKSDB_DELAYED_WRITE_RATE = 444;
ERROR HY000: Variable 'rocksdb_delayed_write_rate' is a GLOBAL variable and should be set with SET GLOBAL
@@ -52,34 +52,34 @@ SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 'aaa';
Got one of the listed errors
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to 'bbb'"
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = 'bbb';
Got one of the listed errors
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to '-1'"
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = '-1';
Got one of the listed errors
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to '101'"
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = '101';
Got one of the listed errors
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
"Trying to set variable @@global.ROCKSDB_DELAYED_WRITE_RATE to '484436'"
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = '484436';
Got one of the listed errors
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
SET @@global.ROCKSDB_DELAYED_WRITE_RATE = @start_global_value;
SELECT @@global.ROCKSDB_DELAYED_WRITE_RATE;
@@global.ROCKSDB_DELAYED_WRITE_RATE
-16777216
+0
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_disable_2pc_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result
index 686f8bcd39a..686f8bcd39a 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_disable_2pc_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result
index f12e39fff93..a63383a4d59 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result
@@ -1,7 +1,7 @@
SET @start_global_value = @@global.ROCKSDB_ENABLE_THREAD_TRACKING;
SELECT @start_global_value;
@start_global_value
-0
+1
"Trying to set variable @@global.ROCKSDB_ENABLE_THREAD_TRACKING to 444. It should fail because it is readonly."
SET @@global.ROCKSDB_ENABLE_THREAD_TRACKING = 444;
ERROR HY000: Variable 'rocksdb_enable_thread_tracking' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result
new file mode 100644
index 00000000000..1f569235b63
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_TTL;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 1"
+SET @@global.ROCKSDB_ENABLE_TTL = 1;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 0"
+SET @@global.ROCKSDB_ENABLE_TTL = 0;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to on"
+SET @@global.ROCKSDB_ENABLE_TTL = on;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@session.ROCKSDB_ENABLE_TTL to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ENABLE_TTL = 444;
+ERROR HY000: Variable 'rocksdb_enable_ttl' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 'aaa'"
+SET @@global.ROCKSDB_ENABLE_TTL = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL to 'bbb'"
+SET @@global.ROCKSDB_ENABLE_TTL = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+SET @@global.ROCKSDB_ENABLE_TTL = @start_global_value;
+SELECT @@global.ROCKSDB_ENABLE_TTL;
+@@global.ROCKSDB_ENABLE_TTL
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result
new file mode 100644
index 00000000000..005c15e168b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+SELECT @start_global_value;
+@start_global_value
+1
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 1"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 1;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 0"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 0;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to on"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = on;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = DEFAULT;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@session.ROCKSDB_ENABLE_TTL_READ_FILTERING to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_ENABLE_TTL_READ_FILTERING = 444;
+ERROR HY000: Variable 'rocksdb_enable_ttl_read_filtering' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 'aaa'"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+"Trying to set variable @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING to 'bbb'"
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+SET @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING = @start_global_value;
+SELECT @@global.ROCKSDB_ENABLE_TTL_READ_FILTERING;
+@@global.ROCKSDB_ENABLE_TTL_READ_FILTERING
+1
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result
index c93152c4756..37107be469f 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result
@@ -1,64 +1,7 @@
-CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO valid_values VALUES(1);
-INSERT INTO valid_values VALUES(0);
-INSERT INTO valid_values VALUES('on');
-CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO invalid_values VALUES('\'aaa\'');
-INSERT INTO invalid_values VALUES('\'bbb\'');
SET @start_global_value = @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
SELECT @start_global_value;
@start_global_value
0
-'# Setting to valid values in global scope#'
-"Trying to set variable @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to 1"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = 1;
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-1
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = DEFAULT;
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-0
-"Trying to set variable @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to 0"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = 0;
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-0
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = DEFAULT;
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-0
-"Trying to set variable @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to on"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = on;
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-1
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = DEFAULT;
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-0
-"Trying to set variable @@session.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to 444. It should fail because it is not session."
-SET @@session.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = 444;
-ERROR HY000: Variable 'rocksdb_enable_write_thread_adaptive_yield' is a GLOBAL variable and should be set with SET GLOBAL
-'# Testing with invalid values in global scope #'
-"Trying to set variable @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to 'aaa'"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = 'aaa';
-Got one of the listed errors
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-0
-"Trying to set variable @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to 'bbb'"
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = 'bbb';
-Got one of the listed errors
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-0
-SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = @start_global_value;
-SELECT @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD;
-@@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
-0
-DROP TABLE valid_values;
-DROP TABLE invalid_values;
+"Trying to set variable @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD = 444;
+ERROR HY000: Variable 'rocksdb_enable_write_thread_adaptive_yield' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result
index 19be4e3ad5d..b8fe837d2e6 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result
@@ -8,10 +8,6 @@ SET @start_global_value = @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
SELECT @start_global_value;
@start_global_value
1
-SET @start_session_value = @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-SELECT @start_session_value;
-@start_session_value
-1
'# Setting to valid values in global scope#'
"Trying to set variable @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 2"
SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
@@ -43,37 +39,9 @@ SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = DEFAULT;
SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
1
-'# Setting to valid values in session scope#'
-"Trying to set variable @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 2"
-SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 2;
-SELECT @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-@@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
-2
-"Setting the session scope variable back to default"
-SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = DEFAULT;
-SELECT @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-@@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
-1
-"Trying to set variable @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 1"
-SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 1;
-SELECT @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-@@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
-1
-"Setting the session scope variable back to default"
-SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = DEFAULT;
-SELECT @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-@@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
-1
-"Trying to set variable @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 0"
-SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 0;
-SELECT @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-@@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
-0
-"Setting the session scope variable back to default"
-SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = DEFAULT;
-SELECT @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-@@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
-1
+"Trying to set variable @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 444;
+ERROR HY000: Variable 'rocksdb_flush_log_at_trx_commit' is a GLOBAL variable and should be set with SET GLOBAL
'# Testing with invalid values in global scope #'
"Trying to set variable @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT to 'aaa'"
SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = 'aaa';
@@ -85,9 +53,5 @@ SET @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = @start_global_value;
SELECT @@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
@@global.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
1
-SET @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT = @start_session_value;
-SELECT @@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT;
-@@session.ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
-1
DROP TABLE valid_values;
DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
index a1c4d3caaa4..90fd829e7c3 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result
@@ -1,7 +1,7 @@
DROP TABLE IF EXISTS t;
CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
-SET @ORIG_PAUSE_BACKGROUND_WORK = @@rocksdb_force_compute_memtable_stats;
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
set global rocksdb_force_flush_memtable_now = true;
INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
set global rocksdb_force_compute_memtable_stats=0;
@@ -12,4 +12,4 @@ select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' e
case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end
true
DROP TABLE t;
-set global rocksdb_force_compute_memtable_stats = @ORIG_PAUSE_BACKGROUND_WORK;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
new file mode 100644
index 00000000000..50e06b5bacb
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result
@@ -0,0 +1,68 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 0"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 0;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1024"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1024;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1024
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 1073741824"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 1073741824;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+1073741824
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+"Trying to set variable @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 444;
+ERROR HY000: Variable 'rocksdb_force_compute_memtable_stats_cachetime' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME to 'aaa'"
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+60000000
+SET @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME;
+@@global.ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result
new file mode 100644
index 00000000000..68cfeb07fc7
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result
@@ -0,0 +1,50 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+SET @start_global_value = @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to 1"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = 1;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to 0"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = 0;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Trying to set variable @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to on"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = on;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = DEFAULT;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+"Trying to set variable @@session.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = 444;
+ERROR HY000: Variable 'rocksdb_force_flush_memtable_and_lzero_now' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+SET @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW = @start_global_value;
+SELECT @@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW;
+@@global.ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result
new file mode 100644
index 00000000000..0917a3970f4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result
@@ -0,0 +1,86 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(10);
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(42);
+INSERT INTO valid_values VALUES(142);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 10"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 10;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+10
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 100"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 100;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 0"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 0;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 42"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 42;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+42
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 142"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 142;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+142
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = DEFAULT;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@session.ROCKSDB_IO_WRITE_TIMEOUT to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_IO_WRITE_TIMEOUT = 444;
+ERROR HY000: Variable 'rocksdb_io_write_timeout' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 'aaa'"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+"Trying to set variable @@global.ROCKSDB_IO_WRITE_TIMEOUT to 'bbb'"
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+SET @@global.ROCKSDB_IO_WRITE_TIMEOUT = @start_global_value;
+SELECT @@global.ROCKSDB_IO_WRITE_TIMEOUT;
+@@global.ROCKSDB_IO_WRITE_TIMEOUT
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
new file mode 100644
index 00000000000..89697683d1c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result
@@ -0,0 +1,64 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_LARGE_PREFIX;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 1"
+SET @@global.ROCKSDB_LARGE_PREFIX = 1;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 0"
+SET @@global.ROCKSDB_LARGE_PREFIX = 0;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to on"
+SET @@global.ROCKSDB_LARGE_PREFIX = on;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_LARGE_PREFIX = DEFAULT;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@session.ROCKSDB_LARGE_PREFIX to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_LARGE_PREFIX = 444;
+ERROR HY000: Variable 'rocksdb_large_prefix' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'aaa'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+"Trying to set variable @@global.ROCKSDB_LARGE_PREFIX to 'bbb'"
+SET @@global.ROCKSDB_LARGE_PREFIX = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+SET @@global.ROCKSDB_LARGE_PREFIX = @start_global_value;
+SELECT @@global.ROCKSDB_LARGE_PREFIX;
+@@global.ROCKSDB_LARGE_PREFIX
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
new file mode 100644
index 00000000000..9b176263a23
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_MANUAL_WAL_FLUSH;
+SELECT @start_global_value;
+@start_global_value
+1
+"Trying to set variable @@global.ROCKSDB_MANUAL_WAL_FLUSH to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_MANUAL_WAL_FLUSH = 444;
+ERROR HY000: Variable 'rocksdb_manual_wal_flush' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result
deleted file mode 100644
index 714f2101127..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_compactions_basic.result
+++ /dev/null
@@ -1,46 +0,0 @@
-CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO valid_values VALUES(1);
-INSERT INTO valid_values VALUES(64);
-CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO invalid_values VALUES('\'abc\'');
-SET @start_global_value = @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS;
-SELECT @start_global_value;
-@start_global_value
-1
-'# Setting to valid values in global scope#'
-"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 1"
-SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 1;
-SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS;
-@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS
-1
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = DEFAULT;
-SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS;
-@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS
-1
-"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 64"
-SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 64;
-SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS;
-@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS
-64
-"Setting the global scope variable back to default"
-SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = DEFAULT;
-SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS;
-@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS
-1
-"Trying to set variable @@session.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 444. It should fail because it is not session."
-SET @@session.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 444;
-ERROR HY000: Variable 'rocksdb_max_background_compactions' is a GLOBAL variable and should be set with SET GLOBAL
-'# Testing with invalid values in global scope #'
-"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS to 'abc'"
-SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = 'abc';
-Got one of the listed errors
-SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS;
-@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS
-1
-SET @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS = @start_global_value;
-SELECT @@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS;
-@@global.ROCKSDB_MAX_BACKGROUND_COMPACTIONS
-1
-DROP TABLE valid_values;
-DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_flushes_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_flushes_basic.result
deleted file mode 100644
index ff8f2b5997b..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_flushes_basic.result
+++ /dev/null
@@ -1,7 +0,0 @@
-SET @start_global_value = @@global.ROCKSDB_MAX_BACKGROUND_FLUSHES;
-SELECT @start_global_value;
-@start_global_value
-1
-"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_FLUSHES to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_MAX_BACKGROUND_FLUSHES = 444;
-ERROR HY000: Variable 'rocksdb_max_background_flushes' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result
new file mode 100644
index 00000000000..88e6d21c3ec
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result
@@ -0,0 +1,46 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(64);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'abc\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+SELECT @start_global_value;
+@start_global_value
+2
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_JOBS to 1"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = 1;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_JOBS to 64"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = 64;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+64
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+"Trying to set variable @@session.ROCKSDB_MAX_BACKGROUND_JOBS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_MAX_BACKGROUND_JOBS = 444;
+ERROR HY000: Variable 'rocksdb_max_background_jobs' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_BACKGROUND_JOBS to 'abc'"
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = 'abc';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+SET @@global.ROCKSDB_MAX_BACKGROUND_JOBS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_BACKGROUND_JOBS;
+@@global.ROCKSDB_MAX_BACKGROUND_JOBS
+2
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
new file mode 100644
index 00000000000..74dbdb4288c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result
@@ -0,0 +1,53 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+SET @start_global_value = @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+SELECT @start_global_value;
+@start_global_value
+5
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 100"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 100;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 1"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 1;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = DEFAULT;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@session.ROCKSDB_MAX_LATEST_DEADLOCKS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_MAX_LATEST_DEADLOCKS = 444;
+ERROR HY000: Variable 'rocksdb_max_latest_deadlocks' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to 'aaa'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+"Trying to set variable @@global.ROCKSDB_MAX_LATEST_DEADLOCKS to '123'"
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = '123';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+SET @@global.ROCKSDB_MAX_LATEST_DEADLOCKS = @start_global_value;
+SELECT @@global.ROCKSDB_MAX_LATEST_DEADLOCKS;
+@@global.ROCKSDB_MAX_LATEST_DEADLOCKS
+5
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
new file mode 100644
index 00000000000..277de716d70
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result
@@ -0,0 +1,93 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+SET @start_global_value = @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_global_value;
+@start_global_value
+0
+SET @start_session_value = @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+SELECT @start_session_value;
+@start_session_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Setting to valid values in session scope#'
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 1"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 1;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+1
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 0"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 0;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Setting the session scope variable back to default"
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = DEFAULT;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'aaa'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to 'bbb'"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+"Trying to set variable @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS to on"
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = on;
+Got one of the listed errors
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_global_value;
+SELECT @@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@global.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+SET @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS = @start_session_value;
+SELECT @@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS;
+@@session.ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+include/assert.inc [Alter should have taken at least 10 seconds]
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result
new file mode 100644
index 00000000000..d585e73489c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result
@@ -0,0 +1,97 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_RESET_STATS;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 1"
+SET @@global.ROCKSDB_RESET_STATS = 1;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 0"
+SET @@global.ROCKSDB_RESET_STATS = 0;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to on"
+SET @@global.ROCKSDB_RESET_STATS = on;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to off"
+SET @@global.ROCKSDB_RESET_STATS = off;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to true"
+SET @@global.ROCKSDB_RESET_STATS = true;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to false"
+SET @@global.ROCKSDB_RESET_STATS = false;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_RESET_STATS = DEFAULT;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@session.ROCKSDB_RESET_STATS to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_RESET_STATS = 444;
+ERROR HY000: Variable 'rocksdb_reset_stats' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 'aaa'"
+SET @@global.ROCKSDB_RESET_STATS = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+"Trying to set variable @@global.ROCKSDB_RESET_STATS to 'bbb'"
+SET @@global.ROCKSDB_RESET_STATS = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+SET @@global.ROCKSDB_RESET_STATS = @start_global_value;
+SELECT @@global.ROCKSDB_RESET_STATS;
+@@global.ROCKSDB_RESET_STATS
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
new file mode 100644
index 00000000000..ef4c619457b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_SIM_CACHE_SIZE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_SIM_CACHE_SIZE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_SIM_CACHE_SIZE = 444;
+ERROR HY000: Variable 'rocksdb_sim_cache_size' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result
new file mode 100644
index 00000000000..a714f1c2fdc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result
@@ -0,0 +1,85 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+SET @start_global_value = @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+SELECT @start_global_value;
+@start_global_value
+0
+'# Setting to valid values in global scope#'
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 100"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 100;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+100
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = DEFAULT;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 1"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 1;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+1
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = DEFAULT;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 0"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 0;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Setting the global scope variable back to default"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = DEFAULT;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@session.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 444. It should fail because it is not session."
+SET @@session.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 444;
+ERROR HY000: Variable 'rocksdb_sst_mgr_rate_bytes_per_sec' is a GLOBAL variable and should be set with SET GLOBAL
+'# Testing with invalid values in global scope #'
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 'aaa'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 'aaa';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to 'bbb'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = 'bbb';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to '-1'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = '-1';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to '101'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = '101';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+"Trying to set variable @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC to '484436'"
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = '484436';
+Got one of the listed errors
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+SET @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC = @start_global_value;
+SELECT @@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC;
+@@global.ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+0
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
new file mode 100644
index 00000000000..5ad5394db29
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result
@@ -0,0 +1,108 @@
+call mtr.add_suppression("MyRocks: NULL is not a valid option for updates to column family settings.");
+call mtr.add_suppression("Invalid cf options, '=' expected *");
+call mtr.add_suppression("MyRocks: failed to parse the updated column family options = *");
+call mtr.add_suppression("Invalid cf config for default in override options *");
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE `t1` (
+`col1` bigint(20) NOT NULL,
+`col2` varbinary(64) NOT NULL,
+`col3` varbinary(256) NOT NULL,
+`col4` bigint(20) NOT NULL,
+`col5` mediumblob NOT NULL,
+PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=cf1;custom_p1_cfname=cf2',
+UNIQUE KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=cf3'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+PARTITION BY LIST COLUMNS (`col2`) (
+PARTITION custom_p0 VALUES IN (0x12345),
+PARTITION custom_p1 VALUES IN (0x23456),
+PARTITION custom_p2 VALUES IN (0x34567),
+PARTITION custom_p3 VALUES IN (0x45678),
+PARTITION custom_p4 VALUES IN (0x56789),
+PARTITION custom_p5 VALUES IN (0x6789A),
+PARTITION custom_p6 VALUES IN (0x789AB),
+PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+USE information_schema;
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+SET @@global.rocksdb_update_cf_options = 'aaaaa';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+default WRITE_BUFFER_SIZE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+default TARGET_FILE_SIZE_BASE 67108864
+SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+default={write_buffer_size=8m;target_file_size_base=2m};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+default WRITE_BUFFER_SIZE 8388608
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+default TARGET_FILE_SIZE_BASE 2097152
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf1 WRITE_BUFFER_SIZE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf1 TARGET_FILE_SIZE_BASE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf2 WRITE_BUFFER_SIZE 67108864
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+CF_NAME OPTION_TYPE VALUE
+cf2 MAX_BYTES_FOR_LEVEL_MULTIPLIER 10.000000
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf3 TARGET_FILE_SIZE_BASE 67108864
+SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=8m;target_file_size_base=2m};cf2={write_buffer_size=16m;max_bytes_for_level_multiplier=8};cf3={target_file_size_base=4m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf1={write_buffer_size=8m;target_file_size_base=2m};cf2={write_buffer_size=16m;max_bytes_for_level_multiplier=8};cf3={target_file_size_base=4m};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf1 WRITE_BUFFER_SIZE 8388608
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf1 TARGET_FILE_SIZE_BASE 2097152
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+CF_NAME OPTION_TYPE VALUE
+cf2 WRITE_BUFFER_SIZE 16777216
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+CF_NAME OPTION_TYPE VALUE
+cf2 MAX_BYTES_FOR_LEVEL_MULTIPLIER 8.000000
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf3 TARGET_FILE_SIZE_BASE 4194304
+SET @@global.rocksdb_update_cf_options = 'cf3={target_file_size_base=24m};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf3={target_file_size_base=24m};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf3 TARGET_FILE_SIZE_BASE 25165824
+SET @@global.rocksdb_update_cf_options = 'cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+CF_NAME OPTION_TYPE VALUE
+cf1 TARGET_FILE_SIZE_BASE 25165824
+SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+SELECT @@global.rocksdb_update_cf_options;
+@@global.rocksdb_update_cf_options
+NULL
+USE test;
+DROP TABLE t1;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
new file mode 100644
index 00000000000..f23d1889027
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result
@@ -0,0 +1,19 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+SET @start_global_value = @@global.ROCKSDB_USE_CLOCK_CACHE;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_CLOCK_CACHE to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_CLOCK_CACHE = 444;
+ERROR HY000: Variable 'rocksdb_use_clock_cache' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result
new file mode 100644
index 00000000000..219cdb7319c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result
@@ -0,0 +1,7 @@
+SET @start_global_value = @@global.ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION;
+SELECT @start_global_value;
+@start_global_value
+0
+"Trying to set variable @@global.ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION = 444;
+ERROR HY000: Variable 'rocksdb_use_direct_io_for_flush_and_compaction' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_writes_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_writes_basic.result
deleted file mode 100644
index 4cc787e4586..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_writes_basic.result
+++ /dev/null
@@ -1,7 +0,0 @@
-SET @start_global_value = @@global.ROCKSDB_USE_DIRECT_WRITES;
-SELECT @start_global_value;
-@start_global_value
-0
-"Trying to set variable @@global.ROCKSDB_USE_DIRECT_WRITES to 444. It should fail because it is readonly."
-SET @@global.ROCKSDB_USE_DIRECT_WRITES = 444;
-ERROR HY000: Variable 'rocksdb_use_direct_writes' is a read only variable
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result
new file mode 100644
index 00000000000..af4da8177d4
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result
@@ -0,0 +1,15 @@
+create table t (i int);
+insert into t values (1), (2), (3), (4), (5);
+set session rocksdb_write_batch_max_bytes = 1000;
+insert into t values (1), (2), (3), (4), (5);
+set session rocksdb_write_batch_max_bytes = 10;
+insert into t values (1), (2), (3), (4), (5);
+ERROR HY000: Status error 10 received from RocksDB: Operation aborted: Memory limit reached
+set session rocksdb_write_batch_max_bytes = 0;
+insert into t values (1), (2), (3), (4), (5);
+set session rocksdb_write_batch_max_bytes = 10;
+begin;
+insert into t values (1), (2), (3), (4), (5);
+ERROR HY000: Status error 10 received from RocksDB: Operation aborted: Memory limit reached
+rollback;
+drop table t;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test
index fefd9e39af2..fc700357155 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test
@@ -1,3 +1,4 @@
+--source include/have_rocksdb.inc
--source include/not_embedded.inc
--source include/not_threadpool.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test
index 4f618609223..5d4e3c05f9d 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test
@@ -1,18 +1,5 @@
--source include/have_rocksdb.inc
-
-CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO valid_values VALUES(1);
-INSERT INTO valid_values VALUES(0);
-INSERT INTO valid_values VALUES('on');
-
-CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO invalid_values VALUES('\'aaa\'');
-INSERT INTO invalid_values VALUES('\'bbb\'');
-
--let $sys_var=ROCKSDB_ALLOW_CONCURRENT_MEMTABLE_WRITE
---let $read_only=0
+--let $read_only=1
--let $session=0
--source include/rocksdb_sys_var.inc
-
-DROP TABLE valid_values;
-DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_base_background_compactions_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_base_background_compactions_basic.test
deleted file mode 100644
index 9f001ce103e..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_base_background_compactions_basic.test
+++ /dev/null
@@ -1,7 +0,0 @@
---source include/have_rocksdb.inc
-
---let $sys_var=ROCKSDB_BASE_BACKGROUND_COMPACTIONS
---let $read_only=1
---let $session=0
---source include/rocksdb_sys_var.inc
-
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
new file mode 100644
index 00000000000..e57396e0fdc
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_BULK_LOAD_ALLOW_UNSORTED
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
new file mode 100644
index 00000000000..451653fe769
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_concurrent_prepare_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_CONCURRENT_PREPARE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test
index 2850c7a1a38..a53df21524f 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test
@@ -21,7 +21,7 @@
# Set back to original value
# validate that DEFAULT causes failure in creating checkpoint since
# DEFAULT == ''
---error ER_UNKNOWN_ERROR
+--error ER_RDB_STATUS_GENERAL
--eval SET @@global.ROCKSDB_CREATE_CHECKPOINT = @start_value
# clean up
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
new file mode 100644
index 00000000000..cab72a11e18
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_DEADLOCK_DETECT_DEPTH
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
new file mode 100644
index 00000000000..8ad071e131b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_IGNORE_PK
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test
new file mode 100644
index 00000000000..c3837ff1454
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_READ_FILTER_TS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test
new file mode 100644
index 00000000000..14c3e3d30aa
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_REC_TS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test
new file mode 100644
index 00000000000..af507fbe7db
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(2400);
+INSERT INTO valid_values VALUES(-2400);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_DEBUG_TTL_SNAPSHOT_TS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_disable_2pc_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test
index 0a38895c35a..0a38895c35a 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_disable_2pc_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_background_sync_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test
index d8efc082266..209530bd899 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_background_sync_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test
@@ -4,12 +4,12 @@ CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(0);
INSERT INTO valid_values VALUES('on');
-INSERT INTO valid_values VALUES('off');
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
---let $sys_var=ROCKSDB_BACKGROUND_SYNC
+--let $sys_var=ROCKSDB_ENABLE_TTL
--let $read_only=0
--let $session=0
--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test
new file mode 100644
index 00000000000..cc034ed47d6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_ENABLE_TTL_READ_FILTERING
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test
index 1904dd2cd69..b5c844a9c93 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test
@@ -1,18 +1,5 @@
--source include/have_rocksdb.inc
-
-CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO valid_values VALUES(1);
-INSERT INTO valid_values VALUES(0);
-INSERT INTO valid_values VALUES('on');
-
-CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
-INSERT INTO invalid_values VALUES('\'aaa\'');
-INSERT INTO invalid_values VALUES('\'bbb\'');
-
--let $sys_var=ROCKSDB_ENABLE_WRITE_THREAD_ADAPTIVE_YIELD
---let $read_only=0
+--let $read_only=1
--let $session=0
--source include/rocksdb_sys_var.inc
-
-DROP TABLE valid_values;
-DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test
index 3a8ac014c7d..02c533dc7c6 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test
@@ -10,9 +10,8 @@ INSERT INTO invalid_values VALUES('\'aaa\'');
--let $sys_var=ROCKSDB_FLUSH_LOG_AT_TRX_COMMIT
--let $read_only=0
---let $session=1
+--let $session=0
--source include/rocksdb_sys_var.inc
DROP TABLE valid_values;
DROP TABLE invalid_values;
-
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
index 3a0d7f63938..318ae1ee598 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test
@@ -6,7 +6,7 @@ DROP TABLE IF EXISTS t;
CREATE TABLE t (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb;
INSERT INTO t (a,b) VALUES (1,'bar'),(2,'foo');
-SET @ORIG_PAUSE_BACKGROUND_WORK = @@rocksdb_force_compute_memtable_stats;
+SET @ORIG_FORCE_COMPUTE_MEMTABLE_STATS = @@rocksdb_force_compute_memtable_stats;
set global rocksdb_force_flush_memtable_now = true;
INSERT INTO t (a,b) VALUES (3,'dead'),(4,'beef'),(5,'a'),(6,'bbb'),(7,'c'),(8,'d');
@@ -20,4 +20,4 @@ SELECT TABLE_ROWS INTO @ROWS_INCLUDE_MEMTABLE FROM information_schema.TABLES WHE
select case when @ROWS_INCLUDE_MEMTABLE-@ROWS_EXCLUDE_MEMTABLE > 0 then 'true' else 'false' end;
DROP TABLE t;
-set global rocksdb_force_compute_memtable_stats = @ORIG_PAUSE_BACKGROUND_WORK;
+set global rocksdb_force_compute_memtable_stats = @ORIG_FORCE_COMPUTE_MEMTABLE_STATS;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
new file mode 100644
index 00000000000..20180ec16a9
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+INSERT INTO valid_values VALUES(1024 * 1024 * 1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_FORCE_COMPUTE_MEMTABLE_STATS_CACHETIME
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test
new file mode 100644
index 00000000000..5eeac501040
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+
+--let $sys_var=ROCKSDB_FORCE_FLUSH_MEMTABLE_AND_LZERO_NOW
+--let $read_only=0
+--let $session=0
+--let $sticky=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test
new file mode 100644
index 00000000000..4433eb2632d
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(10);
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES(42);
+INSERT INTO valid_values VALUES(142);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_IO_WRITE_TIMEOUT
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
new file mode 100644
index 00000000000..5ed3c74131e
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test
@@ -0,0 +1,18 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_LARGE_PREFIX
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
new file mode 100644
index 00000000000..3e01722d5ea
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(1024);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_MANUAL_WAL_FLUSH
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_flushes_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_flushes_basic.test
deleted file mode 100644
index db5b7112e9c..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_flushes_basic.test
+++ /dev/null
@@ -1,6 +0,0 @@
---source include/have_rocksdb.inc
-
---let $sys_var=ROCKSDB_MAX_BACKGROUND_FLUSHES
---let $read_only=1
---let $session=0
---source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_compactions_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test
index 6f0909a24c1..375a4fddb93 100644
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_compactions_basic.test
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test
@@ -7,7 +7,7 @@ INSERT INTO valid_values VALUES(64);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'abc\'');
---let $sys_var=ROCKSDB_MAX_BACKGROUND_COMPACTIONS
+--let $sys_var=ROCKSDB_MAX_BACKGROUND_JOBS
--let $read_only=0
--let $session=0
--source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
new file mode 100644
index 00000000000..9917ec31d9b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test
@@ -0,0 +1,17 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'123\'');
+
+--let $sys_var=ROCKSDB_MAX_LATEST_DEADLOCKS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
new file mode 100644
index 00000000000..03cc0b11d8c
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test
@@ -0,0 +1,49 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('on');
+
+--let $sys_var=ROCKSDB_MERGE_TMP_FILE_REMOVAL_DELAY_MS
+--let $read_only=0
+--let $session=1
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
+set session rocksdb_merge_buf_size=250;
+set session rocksdb_merge_combine_read_size=1000;
+set session rocksdb_merge_tmp_file_removal_delay_ms=1000;
+
+CREATE TABLE t1 (i INT, j INT, PRIMARY KEY (i)) ENGINE = ROCKSDB;
+
+--disable_query_log
+let $max = 100;
+let $i = 1;
+while ($i <= $max) {
+ let $insert = INSERT INTO t1 VALUES ($i, FLOOR(RAND() * 100));
+ inc $i;
+ eval $insert;
+}
+--enable_query_log
+
+let $start= `SELECT UNIX_TIMESTAMP()`;
+# this should take a lot longer than normal because each deleted merge file
+# will sleep for 1 secs. There should be about 13 buffers.
+# So it should take at least 13 secs
+ALTER TABLE t1 ADD INDEX kj(j), ALGORITHM=INPLACE;
+let $stop_exec= `SELECT UNIX_TIMESTAMP()`;
+
+let $time_diff= `SELECT ($stop_exec - $start)`;
+let $assert_text= Alter should have taken at least 10 seconds;
+let $assert_cond= $time_diff >= 10;
+source include/assert.inc;
+
+DROP TABLE t1;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test
new file mode 100644
index 00000000000..62f75a3bcc5
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_RESET_STATS
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
new file mode 100644
index 00000000000..a82d50e1d03
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_SIM_CACHE_SIZE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test
new file mode 100644
index 00000000000..3492596d74b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test
@@ -0,0 +1,22 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(100);
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+INSERT INTO invalid_values VALUES('\'-1\'');
+INSERT INTO invalid_values VALUES('\'101\'');
+INSERT INTO invalid_values VALUES('\'484436\'');
+
+--let $sys_var=ROCKSDB_SST_MGR_RATE_BYTES_PER_SEC
+--let $read_only=0
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
+
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
new file mode 100644
index 00000000000..15d5d870ae6
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test
@@ -0,0 +1,94 @@
+--source include/have_rocksdb.inc
+
+call mtr.add_suppression("MyRocks: NULL is not a valid option for updates to column family settings.");
+call mtr.add_suppression("Invalid cf options, '=' expected *");
+call mtr.add_suppression("MyRocks: failed to parse the updated column family options = *");
+call mtr.add_suppression("Invalid cf config for default in override options *");
+
+DROP TABLE IF EXISTS t1;
+
+# Need a table which has multiple partitions and column families associated
+# with them to make sure that we're testing the valid scenario.
+CREATE TABLE `t1` (
+ `col1` bigint(20) NOT NULL,
+ `col2` varbinary(64) NOT NULL,
+ `col3` varbinary(256) NOT NULL,
+ `col4` bigint(20) NOT NULL,
+ `col5` mediumblob NOT NULL,
+ PRIMARY KEY (`col1`,`col2`,`col3`) COMMENT 'custom_p0_cfname=cf1;custom_p1_cfname=cf2',
+ UNIQUE KEY (`col2`, `col4`) COMMENT 'custom_p5_cfname=cf3'
+) ENGINE=ROCKSDB DEFAULT CHARSET=latin1
+ PARTITION BY LIST COLUMNS (`col2`) (
+ PARTITION custom_p0 VALUES IN (0x12345),
+ PARTITION custom_p1 VALUES IN (0x23456),
+ PARTITION custom_p2 VALUES IN (0x34567),
+ PARTITION custom_p3 VALUES IN (0x45678),
+ PARTITION custom_p4 VALUES IN (0x56789),
+ PARTITION custom_p5 VALUES IN (0x6789A),
+ PARTITION custom_p6 VALUES IN (0x789AB),
+ PARTITION custom_p7 VALUES IN (0x89ABC)
+);
+
+USE information_schema;
+
+# We should start with NULL.
+SELECT @@global.rocksdb_update_cf_options;
+
+# ... and we should be able to handle NULL and issue a reasonable warning.
+SET @@global.rocksdb_update_cf_options = NULL;
+SELECT @@global.rocksdb_update_cf_options;
+
+# Will fail to parse. Value not updated.
+SET @@global.rocksdb_update_cf_options = 'aaaaa';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# All good. Use default CF.
+SET @@global.rocksdb_update_cf_options = 'default={write_buffer_size=8m;target_file_size_base=2m};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='default' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# All good. Use multiple valid CF-s.
+SET @@global.rocksdb_update_cf_options = 'cf1={write_buffer_size=8m;target_file_size_base=2m};cf2={write_buffer_size=16m;max_bytes_for_level_multiplier=8};cf3={target_file_size_base=4m};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='WRITE_BUFFER_SIZE';
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf2' AND OPTION_TYPE='MAX_BYTES_FOR_LEVEL_MULTIPLIER';
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# All good. Use a single valid CF.
+SET @@global.rocksdb_update_cf_options = 'cf3={target_file_size_base=24m};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf3' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# Some parts are good. Value still updated.
+SET @@global.rocksdb_update_cf_options = 'cf1={target_file_size_base=24m};foo={max_bytes_for_level_multiplier=8};';
+SELECT @@global.rocksdb_update_cf_options;
+
+SELECT * FROM ROCKSDB_CF_OPTIONS WHERE CF_NAME='cf1' AND OPTION_TYPE='TARGET_FILE_SIZE_BASE';
+
+# Will fail to parse. No valid assignments included. Value not updated and
+# reset to NULL.
+SET @@global.rocksdb_update_cf_options = 'default={foo=bar};';
+SELECT @@global.rocksdb_update_cf_options;
+
+USE test;
+
+DROP TABLE t1; \ No newline at end of file
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
new file mode 100644
index 00000000000..d3e7b652039
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(0);
+INSERT INTO valid_values VALUES('on');
+INSERT INTO valid_values VALUES('off');
+INSERT INTO valid_values VALUES('true');
+INSERT INTO valid_values VALUES('false');
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+INSERT INTO invalid_values VALUES('\'bbb\'');
+
+--let $sys_var=ROCKSDB_USE_CLOCK_CACHE
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test
new file mode 100644
index 00000000000..f5dde2aa0a3
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test
@@ -0,0 +1,6 @@
+--source include/have_rocksdb.inc
+
+--let $sys_var=ROCKSDB_USE_DIRECT_IO_FOR_FLUSH_AND_COMPACTION
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_writes_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_writes_basic.test
deleted file mode 100644
index 14e6de24652..00000000000
--- a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_writes_basic.test
+++ /dev/null
@@ -1,6 +0,0 @@
---source include/have_rocksdb.inc
-
---let $sys_var=ROCKSDB_USE_DIRECT_WRITES
---let $read_only=1
---let $session=0
---source include/rocksdb_sys_var.inc
diff --git a/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test
new file mode 100644
index 00000000000..295d8e1594b
--- /dev/null
+++ b/storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test
@@ -0,0 +1,26 @@
+--source include/have_rocksdb.inc
+
+create table t (i int);
+
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 1000;
+
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 10;
+
+--error ER_RDB_STATUS_GENERAL
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 0;
+
+insert into t values (1), (2), (3), (4), (5);
+
+set session rocksdb_write_batch_max_bytes = 10;
+begin;
+--error ER_RDB_STATUS_GENERAL
+insert into t values (1), (2), (3), (4), (5);
+rollback;
+
+drop table t;
diff --git a/storage/rocksdb/rdb_buff.h b/storage/rocksdb/rdb_buff.h
index 4b5850137f6..c9647707232 100644
--- a/storage/rocksdb/rdb_buff.h
+++ b/storage/rocksdb/rdb_buff.h
@@ -300,6 +300,16 @@ public:
}
}
+ bool read_uint64(uint64 *const res) {
+ const uchar *p;
+ if (!(p = reinterpret_cast<const uchar *>(read(sizeof(uint64))))) {
+ return true; // error
+ } else {
+ *res = rdb_netbuf_to_uint64(p);
+ return false; // Ok
+ }
+ }
+
uint remaining_bytes() const { return m_len; }
/*
@@ -369,6 +379,22 @@ public:
DBUG_ASSERT(pos < get_current_pos() && (pos + 1) < get_current_pos());
rdb_netbuf_store_uint16(m_data.data() + pos, new_val);
}
+
+ void truncate(const size_t &pos) {
+ DBUG_ASSERT(pos < m_data.size());
+ m_data.resize(pos);
+ }
+
+ void allocate(const size_t &len, const uchar &val = 0) {
+ DBUG_ASSERT(len > 0);
+ m_data.resize(m_data.size() + len, val);
+ }
+
+ /*
+ An awful hack to deallocate the buffer without relying on the deconstructor.
+ This is needed to suppress valgrind errors in rocksdb.partition
+ */
+ void free() { std::vector<uchar>().swap(m_data); }
};
/*
diff --git a/storage/rocksdb/rdb_cf_manager.cc b/storage/rocksdb/rdb_cf_manager.cc
index f8a792d5c3a..9e2594ee246 100644
--- a/storage/rocksdb/rdb_cf_manager.cc
+++ b/storage/rocksdb/rdb_cf_manager.cc
@@ -43,7 +43,7 @@ bool Rdb_cf_manager::is_cf_name_reverse(const char *const name) {
}
void Rdb_cf_manager::init(
- Rdb_cf_options *const cf_options,
+ std::unique_ptr<Rdb_cf_options> cf_options,
std::vector<rocksdb::ColumnFamilyHandle *> *const handles) {
mysql_mutex_init(rdb_cfm_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST);
@@ -51,7 +51,7 @@ void Rdb_cf_manager::init(
DBUG_ASSERT(handles != nullptr);
DBUG_ASSERT(handles->size() > 0);
- m_cf_options = cf_options;
+ m_cf_options = std::move(cf_options);
for (auto cfh : *handles) {
DBUG_ASSERT(cfh != nullptr);
@@ -65,21 +65,7 @@ void Rdb_cf_manager::cleanup() {
delete it.second;
}
mysql_mutex_destroy(&m_mutex);
-}
-
-/**
- Generate Column Family name for per-index column families
-
- @param res OUT Column Family name
-*/
-
-void Rdb_cf_manager::get_per_index_cf_name(const std::string &db_table_name,
- const char *const index_name,
- std::string *const res) {
- DBUG_ASSERT(index_name != nullptr);
- DBUG_ASSERT(res != nullptr);
-
- *res = db_table_name + "." + index_name;
+ m_cf_options = nullptr;
}
/*
@@ -90,32 +76,22 @@ void Rdb_cf_manager::get_per_index_cf_name(const std::string &db_table_name,
See Rdb_cf_manager::get_cf
*/
rocksdb::ColumnFamilyHandle *
-Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb, const char *cf_name,
- const std::string &db_table_name,
- const char *const index_name,
- bool *const is_automatic) {
+Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb,
+ const std::string &cf_name_arg) {
DBUG_ASSERT(rdb != nullptr);
- DBUG_ASSERT(is_automatic != nullptr);
rocksdb::ColumnFamilyHandle *cf_handle = nullptr;
- RDB_MUTEX_LOCK_CHECK(m_mutex);
-
- *is_automatic = false;
-
- if (cf_name == nullptr || *cf_name == '\0') {
- cf_name = DEFAULT_CF_NAME;
+ if (cf_name_arg == PER_INDEX_CF_NAME) {
+ // per-index column families is no longer supported.
+ my_error(ER_PER_INDEX_CF_DEPRECATED, MYF(0));
+ return nullptr;
}
- DBUG_ASSERT(cf_name != nullptr);
+ const std::string &cf_name =
+ cf_name_arg.empty() ? DEFAULT_CF_NAME : cf_name_arg;
- std::string per_index_name;
-
- if (!strcmp(cf_name, PER_INDEX_CF_NAME)) {
- get_per_index_cf_name(db_table_name, index_name, &per_index_name);
- cf_name = per_index_name.c_str();
- *is_automatic = true;
- }
+ RDB_MUTEX_LOCK_CHECK(m_mutex);
const auto it = m_cf_name_map.find(cf_name);
@@ -123,19 +99,18 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb, const char *cf_name,
cf_handle = it->second;
} else {
/* Create a Column Family. */
- const std::string cf_name_str(cf_name);
rocksdb::ColumnFamilyOptions opts;
- m_cf_options->get_cf_options(cf_name_str, &opts);
+ m_cf_options->get_cf_options(cf_name, &opts);
// NO_LINT_DEBUG
sql_print_information("RocksDB: creating a column family %s",
- cf_name_str.c_str());
+ cf_name.c_str());
sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size);
sql_print_information(" target_file_size_base=%" PRIu64,
opts.target_file_size_base);
const rocksdb::Status s =
- rdb->CreateColumnFamily(opts, cf_name_str, &cf_handle);
+ rdb->CreateColumnFamily(opts, cf_name, &cf_handle);
if (s.ok()) {
m_cf_name_map[cf_handle->GetName()] = cf_handle;
@@ -152,47 +127,22 @@ Rdb_cf_manager::get_or_create_cf(rocksdb::DB *const rdb, const char *cf_name,
/*
Find column family by its cf_name.
-
- @detail
- dbname.tablename and index_name are also parameters, because
- cf_name=PER_INDEX_CF_NAME means that column family name is a function
- of table/index name.
-
- @param out is_automatic TRUE<=> column family name is auto-assigned based on
- db_table_name and index_name.
*/
rocksdb::ColumnFamilyHandle *
-Rdb_cf_manager::get_cf(const char *cf_name, const std::string &db_table_name,
- const char *const index_name,
- bool *const is_automatic) const {
- DBUG_ASSERT(is_automatic != nullptr);
-
+Rdb_cf_manager::get_cf(const std::string &cf_name_arg) const {
rocksdb::ColumnFamilyHandle *cf_handle;
- *is_automatic = false;
-
RDB_MUTEX_LOCK_CHECK(m_mutex);
- if (cf_name == nullptr) {
- cf_name = DEFAULT_CF_NAME;
- }
-
- std::string per_index_name;
-
- if (!strcmp(cf_name, PER_INDEX_CF_NAME)) {
- get_per_index_cf_name(db_table_name, index_name, &per_index_name);
- DBUG_ASSERT(!per_index_name.empty());
- cf_name = per_index_name.c_str();
- *is_automatic = true;
- }
+ std::string cf_name = cf_name_arg.empty() ? DEFAULT_CF_NAME : cf_name_arg;
const auto it = m_cf_name_map.find(cf_name);
cf_handle = (it != m_cf_name_map.end()) ? it->second : nullptr;
if (!cf_handle) {
// NO_LINT_DEBUG
- sql_print_warning("Column family '%s' not found.", cf_name);
+ sql_print_warning("Column family '%s' not found.", cf_name.c_str());
}
RDB_MUTEX_UNLOCK_CHECK(m_mutex);
@@ -231,6 +181,7 @@ Rdb_cf_manager::get_all_cf(void) const {
RDB_MUTEX_LOCK_CHECK(m_mutex);
for (auto it : m_cf_id_map) {
+ DBUG_ASSERT(it.second != nullptr);
list.push_back(it.second);
}
diff --git a/storage/rocksdb/rdb_cf_manager.h b/storage/rocksdb/rdb_cf_manager.h
index 7b9654f3537..3f27747dce1 100644
--- a/storage/rocksdb/rdb_cf_manager.h
+++ b/storage/rocksdb/rdb_cf_manager.h
@@ -52,13 +52,9 @@ class Rdb_cf_manager {
mutable mysql_mutex_t m_mutex;
- static void get_per_index_cf_name(const std::string &db_table_name,
- const char *const index_name,
- std::string *const res);
+ std::unique_ptr<Rdb_cf_options> m_cf_options = nullptr;
- Rdb_cf_options *m_cf_options = nullptr;
-
-public:
+ public:
Rdb_cf_manager(const Rdb_cf_manager &) = delete;
Rdb_cf_manager &operator=(const Rdb_cf_manager &) = delete;
Rdb_cf_manager() = default;
@@ -70,25 +66,19 @@ public:
column
families that are present in the database. The first CF is the default CF.
*/
- void init(Rdb_cf_options *cf_options,
+ void init(std::unique_ptr<Rdb_cf_options> cf_options,
std::vector<rocksdb::ColumnFamilyHandle *> *const handles);
void cleanup();
/*
Used by CREATE TABLE.
- cf_name=nullptr means use default column family
- - cf_name=_auto_ means use 'dbname.tablename.indexname'
*/
- rocksdb::ColumnFamilyHandle *
- get_or_create_cf(rocksdb::DB *const rdb, const char *cf_name,
- const std::string &db_table_name,
- const char *const index_name, bool *const is_automatic);
+ rocksdb::ColumnFamilyHandle *get_or_create_cf(rocksdb::DB *const rdb,
+ const std::string &cf_name);
/* Used by table open */
- rocksdb::ColumnFamilyHandle *get_cf(const char *cf_name,
- const std::string &db_table_name,
- const char *const index_name,
- bool *const is_automatic) const;
+ rocksdb::ColumnFamilyHandle *get_cf(const std::string &cf_name) const;
/* Look up cf by id; used by datadic */
rocksdb::ColumnFamilyHandle *get_cf(const uint32_t &id) const;
@@ -106,6 +96,11 @@ public:
MY_ATTRIBUTE((__nonnull__)) {
m_cf_options->get_cf_options(cf_name, opts);
}
+
+ void update_options_map(const std::string &cf_name,
+ const std::string &updated_options) {
+ m_cf_options->update(cf_name, updated_options);
+ }
};
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_cf_options.cc b/storage/rocksdb/rdb_cf_options.cc
index 1bf727dfb9c..e608580c666 100644
--- a/storage/rocksdb/rdb_cf_options.cc
+++ b/storage/rocksdb/rdb_cf_options.cc
@@ -74,16 +74,28 @@ void Rdb_cf_options::get(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts) {
DBUG_ASSERT(opts != nullptr);
- // set defaults
+ // Get defaults.
rocksdb::GetColumnFamilyOptionsFromString(*opts, m_default_config, opts);
- // set per-cf config if we have one
+ // Get a custom confguration if we have one.
Name_to_config_t::iterator it = m_name_map.find(cf_name);
+
if (it != m_name_map.end()) {
rocksdb::GetColumnFamilyOptionsFromString(*opts, it->second, opts);
}
}
+void Rdb_cf_options::update(const std::string &cf_name,
+ const std::string &cf_options) {
+ DBUG_ASSERT(!cf_name.empty());
+ DBUG_ASSERT(!cf_options.empty());
+
+ // Always update. If we didn't have an entry before then add it.
+ m_name_map[cf_name] = cf_options;
+
+ DBUG_ASSERT(!m_name_map.empty());
+}
+
bool Rdb_cf_options::set_default(const std::string &default_config) {
rocksdb::ColumnFamilyOptions options;
@@ -247,27 +259,30 @@ bool Rdb_cf_options::find_cf_options_pair(const std::string &input,
return true;
}
-bool Rdb_cf_options::set_override(const std::string &override_config) {
- // TODO(???): support updates?
-
+bool Rdb_cf_options::parse_cf_options(const std::string &cf_options,
+ Name_to_config_t *option_map) {
std::string cf;
std::string opt_str;
rocksdb::ColumnFamilyOptions options;
- Name_to_config_t configs;
+
+ DBUG_ASSERT(option_map != nullptr);
+ DBUG_ASSERT(option_map->empty());
// Loop through the characters of the string until we reach the end.
size_t pos = 0;
- while (pos < override_config.size()) {
+
+ while (pos < cf_options.size()) {
// Attempt to find <cf>={<opt_str>}.
- if (!find_cf_options_pair(override_config, &pos, &cf, &opt_str))
+ if (!find_cf_options_pair(cf_options, &pos, &cf, &opt_str)) {
return false;
+ }
// Generate an error if we have already seen this column family.
- if (configs.find(cf) != configs.end()) {
+ if (option_map->find(cf) != option_map->end()) {
// NO_LINT_DEBUG
sql_print_warning(
"Duplicate entry for %s in override options (options: %s)",
- cf.c_str(), override_config.c_str());
+ cf.c_str(), cf_options.c_str());
return false;
}
@@ -277,12 +292,22 @@ bool Rdb_cf_options::set_override(const std::string &override_config) {
// NO_LINT_DEBUG
sql_print_warning(
"Invalid cf config for %s in override options (options: %s)",
- cf.c_str(), override_config.c_str());
+ cf.c_str(), cf_options.c_str());
return false;
}
// If everything is good, add this cf/opt_str pair to the map.
- configs[cf] = opt_str;
+ (*option_map)[cf] = opt_str;
+ }
+
+ return true;
+}
+
+bool Rdb_cf_options::set_override(const std::string &override_config) {
+ Name_to_config_t configs;
+
+ if (!parse_cf_options(override_config, &configs)) {
+ return false;
}
// Everything checked out - make the map live
diff --git a/storage/rocksdb/rdb_cf_options.h b/storage/rocksdb/rdb_cf_options.h
index 1cd80a131ad..32f2308284f 100644
--- a/storage/rocksdb/rdb_cf_options.h
+++ b/storage/rocksdb/rdb_cf_options.h
@@ -40,6 +40,8 @@ namespace myrocks {
*/
class Rdb_cf_options {
public:
+ using Name_to_config_t = std::unordered_map<std::string, std::string>;
+
Rdb_cf_options(const Rdb_cf_options &) = delete;
Rdb_cf_options &operator=(const Rdb_cf_options &) = delete;
Rdb_cf_options() = default;
@@ -47,6 +49,8 @@ public:
void get(const std::string &cf_name,
rocksdb::ColumnFamilyOptions *const opts);
+ void update(const std::string &cf_name, const std::string &cf_options);
+
bool init(const rocksdb::BlockBasedTableOptions &table_options,
std::shared_ptr<rocksdb::TablePropertiesCollectorFactory>
prop_coll_factory,
@@ -64,6 +68,9 @@ public:
rocksdb::ColumnFamilyOptions *const opts)
MY_ATTRIBUTE((__nonnull__));
+ static bool parse_cf_options(const std::string &cf_options,
+ Name_to_config_t *option_map);
+
private:
bool set_default(const std::string &default_config);
bool set_override(const std::string &overide_config);
@@ -82,8 +89,6 @@ private:
static Rdb_pk_comparator s_pk_comparator;
static Rdb_rev_comparator s_rev_pk_comparator;
- typedef std::unordered_map<std::string, std::string> Name_to_config_t;
-
/* CF name -> value map */
Name_to_config_t m_name_map;
diff --git a/storage/rocksdb/rdb_compact_filter.h b/storage/rocksdb/rdb_compact_filter.h
index 9e0d69597ff..c457fba4af2 100644
--- a/storage/rocksdb/rdb_compact_filter.h
+++ b/storage/rocksdb/rdb_compact_filter.h
@@ -22,6 +22,7 @@
/* C++ system header files */
#include <string>
+#include <time.h>
/* RocksDB includes */
#include "rocksdb/compaction_filter.h"
@@ -38,7 +39,10 @@ public:
Rdb_compact_filter &operator=(const Rdb_compact_filter &) = delete;
explicit Rdb_compact_filter(uint32_t _cf_id) : m_cf_id(_cf_id) {}
- ~Rdb_compact_filter() {}
+ ~Rdb_compact_filter() {
+ // Increment stats by num expired at the end of compaction
+ rdb_update_global_stats(ROWS_EXPIRED, m_num_expired);
+ }
// keys are passed in sorted order within the same sst.
// V1 Filter is thread safe on our usage (creating from Factory).
@@ -55,36 +59,141 @@ public:
gl_index_id.index_id = rdb_netbuf_to_uint32((const uchar *)key.data());
DBUG_ASSERT(gl_index_id.index_id >= 1);
- if (gl_index_id != m_prev_index) // processing new index id
- {
- if (m_num_deleted > 0) {
- m_num_deleted = 0;
- }
+ if (gl_index_id != m_prev_index) {
m_should_delete =
rdb_get_dict_manager()->is_drop_index_ongoing(gl_index_id);
+
+ if (!m_should_delete) {
+ get_ttl_duration_and_offset(gl_index_id, &m_ttl_duration,
+ &m_ttl_offset);
+
+ if (m_ttl_duration != 0 && m_snapshot_timestamp == 0) {
+ /*
+ For efficiency reasons, we lazily call GetIntProperty to get the
+ oldest snapshot time (occurs once per compaction).
+ */
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+ if (!rdb->GetIntProperty(rocksdb::DB::Properties::kOldestSnapshotTime,
+ &m_snapshot_timestamp) ||
+ m_snapshot_timestamp == 0) {
+ m_snapshot_timestamp = static_cast<uint64_t>(std::time(nullptr));
+ }
+
+#ifndef NDEBUG
+ int snapshot_ts = rdb_dbug_set_ttl_snapshot_ts();
+ if (snapshot_ts) {
+ m_snapshot_timestamp =
+ static_cast<uint64_t>(std::time(nullptr)) + snapshot_ts;
+ }
+#endif
+ }
+ }
+
m_prev_index = gl_index_id;
}
if (m_should_delete) {
m_num_deleted++;
+ return true;
+ } else if (m_ttl_duration > 0 &&
+ should_filter_ttl_rec(key, existing_value)) {
+ m_num_expired++;
+ return true;
}
- return m_should_delete;
+ return false;
}
virtual bool IgnoreSnapshots() const override { return true; }
virtual const char *Name() const override { return "Rdb_compact_filter"; }
-private:
+ void get_ttl_duration_and_offset(const GL_INDEX_ID &gl_index_id,
+ uint64 *ttl_duration,
+ uint32 *ttl_offset) const {
+ DBUG_ASSERT(ttl_duration != nullptr);
+ /*
+ If TTL is disabled set ttl_duration to 0. This prevents the compaction
+ filter from dropping expired records.
+ */
+ if (!rdb_is_ttl_enabled()) {
+ *ttl_duration = 0;
+ return;
+ }
+
+ /*
+ If key is part of system column family, it's definitely not a TTL key.
+ */
+ rocksdb::ColumnFamilyHandle *s_cf = rdb_get_dict_manager()->get_system_cf();
+ if (s_cf == nullptr || gl_index_id.cf_id == s_cf->GetID()) {
+ *ttl_duration = 0;
+ return;
+ }
+
+ struct Rdb_index_info index_info;
+ if (!rdb_get_dict_manager()->get_index_info(gl_index_id, &index_info)) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Could not get index information "
+ "for Index Number (%u,%u)",
+ gl_index_id.cf_id, gl_index_id.index_id);
+ }
+
+#ifndef NDEBUG
+ if (rdb_dbug_set_ttl_ignore_pk() &&
+ index_info.m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY) {
+ *ttl_duration = 0;
+ return;
+ }
+#endif
+
+ *ttl_duration = index_info.m_ttl_duration;
+ if (Rdb_key_def::has_index_flag(index_info.m_index_flags,
+ Rdb_key_def::TTL_FLAG)) {
+ *ttl_offset = Rdb_key_def::calculate_index_flag_offset(
+ index_info.m_index_flags, Rdb_key_def::TTL_FLAG);
+ }
+ }
+
+ bool should_filter_ttl_rec(const rocksdb::Slice &key,
+ const rocksdb::Slice &existing_value) const {
+ uint64 ttl_timestamp;
+ Rdb_string_reader reader(&existing_value);
+ if (!reader.read(m_ttl_offset) || reader.read_uint64(&ttl_timestamp)) {
+ std::string buf;
+ buf = rdb_hexdump(existing_value.data(), existing_value.size(),
+ RDB_MAX_HEXDUMP_LEN);
+ // NO_LINT_DEBUG
+ sql_print_error("Decoding ttl from PK value failed in compaction filter, "
+ "for index (%u,%u), val: %s",
+ m_prev_index.cf_id, m_prev_index.index_id, buf.c_str());
+ abort_with_stack_traces();
+ }
+
+ /*
+ Filter out the record only if it is older than the oldest snapshot
+ timestamp. This prevents any rows from expiring in the middle of
+ long-running transactions.
+ */
+ return ttl_timestamp + m_ttl_duration <= m_snapshot_timestamp;
+ }
+
+ private:
// Column family for this compaction filter
const uint32_t m_cf_id;
// Index id of the previous record
mutable GL_INDEX_ID m_prev_index = {0, 0};
// Number of rows deleted for the same index id
mutable uint64 m_num_deleted = 0;
+ // Number of rows expired for the TTL index
+ mutable uint64 m_num_expired = 0;
// Current index id should be deleted or not (should be deleted if true)
mutable bool m_should_delete = false;
+ // TTL duration for the current index if TTL is enabled
+ mutable uint64 m_ttl_duration = 0;
+ // TTL offset for all records in the current index
+ mutable uint32 m_ttl_offset = 0;
+ // Oldest snapshot timestamp at the time a TTL index is discovered
+ mutable uint64_t m_snapshot_timestamp = 0;
};
class Rdb_compact_filter_factory : public rocksdb::CompactionFilterFactory {
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index da4eead99b8..1376d5c8424 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -36,9 +36,12 @@
#include <vector>
/* MySQL header files */
+#include "./field.h"
#include "./key.h"
#include "./m_ctype.h"
#include "./my_bit.h"
+#include "./my_bitmap.h"
+#include "./sql_table.h"
/* MyRocks header files */
#include "./ha_rocksdb_proto.h"
@@ -53,6 +56,16 @@ void get_mem_comparable_space(const CHARSET_INFO *cs,
size_t *mb_len);
/*
+ MariaDB's replacement for FB/MySQL Field::check_field_name_match :
+*/
+inline bool field_check_field_name_match(Field *field, const char *name)
+{
+ return (0 == my_strcasecmp(system_charset_info,
+ field->field_name,
+ name));
+}
+
+/*
Rdb_key_def class implementation
*/
@@ -60,32 +73,56 @@ Rdb_key_def::Rdb_key_def(uint indexnr_arg, uint keyno_arg,
rocksdb::ColumnFamilyHandle *cf_handle_arg,
uint16_t index_dict_version_arg, uchar index_type_arg,
uint16_t kv_format_version_arg, bool is_reverse_cf_arg,
- bool is_auto_cf_arg, bool is_per_partition_cf_arg,
- const char *_name, Rdb_index_stats _stats)
+ bool is_per_partition_cf_arg, const char *_name,
+ Rdb_index_stats _stats, uint32 index_flags_bitmap,
+ uint32 ttl_rec_offset, uint64 ttl_duration)
: m_index_number(indexnr_arg), m_cf_handle(cf_handle_arg),
m_index_dict_version(index_dict_version_arg),
m_index_type(index_type_arg), m_kv_format_version(kv_format_version_arg),
- m_is_reverse_cf(is_reverse_cf_arg), m_is_auto_cf(is_auto_cf_arg),
- m_is_per_partition_cf(is_per_partition_cf_arg),
- m_name(_name), m_stats(_stats), m_pk_part_no(nullptr),
- m_pack_info(nullptr), m_keyno(keyno_arg), m_key_parts(0),
- m_prefix_extractor(nullptr), m_maxlength(0) // means 'not intialized'
+ m_is_reverse_cf(is_reverse_cf_arg),
+ m_is_per_partition_cf(is_per_partition_cf_arg), m_name(_name),
+ m_stats(_stats), m_index_flags_bitmap(index_flags_bitmap),
+ m_ttl_rec_offset(ttl_rec_offset), m_ttl_duration(ttl_duration),
+ m_ttl_column(""), m_pk_part_no(nullptr), m_pack_info(nullptr),
+ m_keyno(keyno_arg), m_key_parts(0), m_ttl_pk_key_part_offset(UINT_MAX),
+ m_ttl_field_offset(UINT_MAX), m_prefix_extractor(nullptr),
+ m_maxlength(0) // means 'not intialized'
{
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
DBUG_ASSERT(m_cf_handle != nullptr);
}
Rdb_key_def::Rdb_key_def(const Rdb_key_def &k)
: m_index_number(k.m_index_number), m_cf_handle(k.m_cf_handle),
- m_is_reverse_cf(k.m_is_reverse_cf), m_is_auto_cf(k.m_is_auto_cf),
- m_is_per_partition_cf(k.m_is_per_partition_cf),
- m_name(k.m_name), m_stats(k.m_stats), m_pk_part_no(k.m_pk_part_no),
+ m_is_reverse_cf(k.m_is_reverse_cf),
+ m_is_per_partition_cf(k.m_is_per_partition_cf), m_name(k.m_name),
+ m_stats(k.m_stats), m_index_flags_bitmap(k.m_index_flags_bitmap),
+ m_ttl_rec_offset(k.m_ttl_rec_offset), m_ttl_duration(k.m_ttl_duration),
+ m_ttl_column(k.m_ttl_column), m_pk_part_no(k.m_pk_part_no),
m_pack_info(k.m_pack_info), m_keyno(k.m_keyno),
- m_key_parts(k.m_key_parts), m_prefix_extractor(k.m_prefix_extractor),
+ m_key_parts(k.m_key_parts),
+ m_ttl_pk_key_part_offset(k.m_ttl_pk_key_part_offset),
+ m_ttl_field_offset(UINT_MAX), m_prefix_extractor(k.m_prefix_extractor),
m_maxlength(k.m_maxlength) {
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
rdb_netbuf_store_index(m_index_number_storage_form, m_index_number);
+ m_total_index_flags_length =
+ calculate_index_flag_offset(m_index_flags_bitmap, MAX_FLAG);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version <= SECONDARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
+ DBUG_ASSERT_IMP(m_index_type == INDEX_TYPE_PRIMARY &&
+ m_kv_format_version <= PRIMARY_FORMAT_VERSION_UPDATE2,
+ m_total_index_flags_length == 0);
if (k.m_pack_info) {
const size_t size = sizeof(Rdb_field_packing) * k.m_key_parts;
m_pack_info =
@@ -178,6 +215,13 @@ void Rdb_key_def::setup(const TABLE *const tbl,
m_pack_info =
reinterpret_cast<Rdb_field_packing *>(my_malloc(size, MYF(0)));
+ /*
+ Guaranteed not to error here as checks have been made already during
+ table creation.
+ */
+ Rdb_key_def::extract_ttl_col(tbl, tbl_def, &m_ttl_column,
+ &m_ttl_field_offset, true);
+
size_t max_len = INDEX_NUMBER_SIZE;
int unpack_len = 0;
int max_part_len = 0;
@@ -250,6 +294,18 @@ void Rdb_key_def::setup(const TABLE *const tbl,
max_part_len =
std::max(max_part_len, m_pack_info[dst_i].m_max_image_len);
+ /*
+ Check key part name here, if it matches the TTL column then we store
+ the offset of the TTL key part here.
+ */
+ if (!m_ttl_column.empty() &&
+ field_check_field_name_match(field, m_ttl_column.c_str())) {
+ DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
+ DBUG_ASSERT(field->key_type() == HA_KEYTYPE_ULONGLONG);
+ DBUG_ASSERT(!field->real_maybe_null());
+ m_ttl_pk_key_part_offset = dst_i;
+ }
+
key_part++;
/*
For "unique" secondary indexes, pretend they have
@@ -296,6 +352,245 @@ void Rdb_key_def::setup(const TABLE *const tbl,
}
}
+/*
+ Determine if the table has TTL enabled by parsing the table comment.
+
+ @param[IN] table_arg
+ @param[IN] tbl_def_arg
+ @param[OUT] ttl_duration Default TTL value parsed from table comment
+*/
+uint Rdb_key_def::extract_ttl_duration(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ uint64 *ttl_duration) {
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(tbl_def_arg != nullptr);
+ DBUG_ASSERT(ttl_duration != nullptr);
+ std::string table_comment(table_arg->s->comment.str,
+ table_arg->s->comment.length);
+
+ bool ttl_duration_per_part_match_found = false;
+ std::string ttl_duration_str = Rdb_key_def::parse_comment_for_qualifier(
+ table_comment, table_arg, tbl_def_arg, &ttl_duration_per_part_match_found,
+ RDB_TTL_DURATION_QUALIFIER);
+
+ /* If we don't have a ttl duration, nothing to do here. */
+ if (ttl_duration_str.empty()) {
+ return HA_EXIT_SUCCESS;
+ }
+
+ /*
+ Catch errors where a non-integral value was used as ttl duration, strtoull
+ will return 0.
+ */
+ *ttl_duration = std::strtoull(ttl_duration_str.c_str(), nullptr, 0);
+ if (!*ttl_duration) {
+ my_error(ER_RDB_TTL_DURATION_FORMAT, MYF(0), ttl_duration_str.c_str());
+ return HA_EXIT_FAILURE;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+/*
+ Determine if the table has TTL enabled by parsing the table comment.
+
+ @param[IN] table_arg
+ @param[IN] tbl_def_arg
+ @param[OUT] ttl_column TTL column in the table
+ @param[IN] skip_checks Skip validation checks (when called in
+ setup())
+*/
+uint Rdb_key_def::extract_ttl_col(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ std::string *ttl_column,
+ uint *ttl_field_offset, bool skip_checks) {
+ std::string table_comment(table_arg->s->comment.str,
+ table_arg->s->comment.length);
+ /*
+ Check if there is a TTL column specified. Note that this is not required
+ and if omitted, an 8-byte ttl field will be prepended to each record
+ implicitly.
+ */
+ bool ttl_col_per_part_match_found = false;
+ std::string ttl_col_str = Rdb_key_def::parse_comment_for_qualifier(
+ table_comment, table_arg, tbl_def_arg, &ttl_col_per_part_match_found,
+ RDB_TTL_COL_QUALIFIER);
+
+ if (skip_checks) {
+ for (uint i = 0; i < table_arg->s->fields; i++) {
+ Field *const field = table_arg->field[i];
+ if (field_check_field_name_match(field, ttl_col_str.c_str())) {
+ *ttl_column = ttl_col_str;
+ *ttl_field_offset = i;
+ }
+ }
+ return HA_EXIT_SUCCESS;
+ }
+
+ /* Check if TTL column exists in table */
+ if (!ttl_col_str.empty()) {
+ bool found = false;
+ for (uint i = 0; i < table_arg->s->fields; i++) {
+ Field *const field = table_arg->field[i];
+ if (field_check_field_name_match(field, ttl_col_str.c_str()) &&
+ field->real_type() == MYSQL_TYPE_LONGLONG &&
+ field->key_type() == HA_KEYTYPE_ULONGLONG &&
+ !field->real_maybe_null()) {
+ *ttl_column = ttl_col_str;
+ *ttl_field_offset = i;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_col_str.c_str());
+ return HA_EXIT_FAILURE;
+ }
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+const std::string
+Rdb_key_def::gen_qualifier_for_table(const char *const qualifier,
+ const std::string &partition_name) {
+ bool has_partition = !partition_name.empty();
+ std::string qualifier_str = "";
+
+ if (!strcmp(qualifier, RDB_CF_NAME_QUALIFIER)) {
+ return has_partition ? gen_cf_name_qualifier_for_partition(partition_name)
+ : qualifier_str + RDB_CF_NAME_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+ } else if (!strcmp(qualifier, RDB_TTL_DURATION_QUALIFIER)) {
+ return has_partition
+ ? gen_ttl_duration_qualifier_for_partition(partition_name)
+ : qualifier_str + RDB_TTL_DURATION_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+ } else if (!strcmp(qualifier, RDB_TTL_COL_QUALIFIER)) {
+ return has_partition ? gen_ttl_col_qualifier_for_partition(partition_name)
+ : qualifier_str + RDB_TTL_COL_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+ } else {
+ DBUG_ASSERT(0);
+ }
+
+ return qualifier_str;
+}
+
+/*
+ Formats the string and returns the column family name assignment part for a
+ specific partition.
+*/
+const std::string
+Rdb_key_def::gen_cf_name_qualifier_for_partition(const std::string &prefix) {
+ DBUG_ASSERT(!prefix.empty());
+
+ return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_CF_NAME_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+}
+
+const std::string Rdb_key_def::gen_ttl_duration_qualifier_for_partition(
+ const std::string &prefix) {
+ DBUG_ASSERT(!prefix.empty());
+
+ return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP +
+ RDB_TTL_DURATION_QUALIFIER + RDB_QUALIFIER_VALUE_SEP;
+}
+
+const std::string
+Rdb_key_def::gen_ttl_col_qualifier_for_partition(const std::string &prefix) {
+ DBUG_ASSERT(!prefix.empty());
+
+ return prefix + RDB_PER_PARTITION_QUALIFIER_NAME_SEP + RDB_TTL_COL_QUALIFIER +
+ RDB_QUALIFIER_VALUE_SEP;
+}
+
+const std::string Rdb_key_def::parse_comment_for_qualifier(
+ const std::string &comment, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found,
+ const char *const qualifier) {
+ DBUG_ASSERT(table_arg != nullptr);
+ DBUG_ASSERT(tbl_def_arg != nullptr);
+ DBUG_ASSERT(per_part_match_found != nullptr);
+ DBUG_ASSERT(qualifier != nullptr);
+
+ std::string empty_result;
+
+ // Flag which marks if partition specific options were found.
+ *per_part_match_found = false;
+
+ if (comment.empty()) {
+ return empty_result;
+ }
+
+ // Let's fetch the comment for a index and check if there's a custom key
+ // name specified for a partition we are handling.
+ std::vector<std::string> v =
+ myrocks::parse_into_tokens(comment, RDB_QUALIFIER_SEP);
+
+ std::string search_str = gen_qualifier_for_table(qualifier);
+
+ // If table has partitions then we need to check if user has requested
+ // qualifiers on a per partition basis.
+ //
+ // NOTE: this means if you specify a qualifier for a specific partition it
+ // will take precedence the 'table level' qualifier if one exists.
+ std::string search_str_part;
+ if (table_arg->part_info != nullptr) {
+ std::string partition_name = tbl_def_arg->base_partition();
+ DBUG_ASSERT(!partition_name.empty());
+ search_str_part = gen_qualifier_for_table(qualifier, partition_name);
+ }
+
+ DBUG_ASSERT(!search_str.empty());
+
+ // Basic O(N) search for a matching assignment. At most we expect maybe
+ // ten or so elements here.
+ if (!search_str_part.empty()) {
+ for (const auto &it : v) {
+ if (it.substr(0, search_str_part.length()) == search_str_part) {
+ // We found a prefix match. Try to parse it as an assignment.
+ std::vector<std::string> tokens =
+ myrocks::parse_into_tokens(it, RDB_QUALIFIER_VALUE_SEP);
+
+ // We found a custom qualifier, it was in the form we expected it to be.
+ // Return that instead of whatever we initially wanted to return. In
+ // a case below the `foo` part will be returned to the caller.
+ //
+ // p3_cfname=foo
+ //
+ // If no value was specified then we'll return an empty string which
+ // later gets translated into using a default CF.
+ if (tokens.size() == 2) {
+ *per_part_match_found = true;
+ return tokens[1];
+ } else {
+ return empty_result;
+ }
+ }
+ }
+ }
+
+ // Do this loop again, this time searching for 'table level' qualifiers if we
+ // didn't find any partition level qualifiers above.
+ for (const auto &it : v) {
+ if (it.substr(0, search_str.length()) == search_str) {
+ std::vector<std::string> tokens =
+ myrocks::parse_into_tokens(it, RDB_QUALIFIER_VALUE_SEP);
+ if (tokens.size() == 2) {
+ return tokens[1];
+ } else {
+ return empty_result;
+ }
+ }
+ }
+
+ // If we didn't find any partitioned/non-partitioned qualifiers, return an
+ // empty string.
+ return empty_result;
+}
+
/**
Read a memcmp key part from a slice using the passed in reader.
@@ -327,7 +622,7 @@ int Rdb_key_def::read_memcmp_key_part(const TABLE *table_arg,
Field *field = nullptr;
if (!is_hidden_pk_part)
field = fpi->get_field_in_table(table_arg);
- if (fpi->m_skip_func(fpi, field, reader))
+ if ((this->*fpi->m_skip_func)(fpi, field, reader))
return 1;
return 0;
@@ -493,11 +788,14 @@ uint Rdb_key_def::pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer,
*/
bool Rdb_key_def::unpack_info_has_checksum(const rocksdb::Slice &unpack_info) {
- const uchar *ptr = (const uchar *)unpack_info.data();
size_t size = unpack_info.size();
+ if (size == 0) {
+ return false;
+ }
+ const uchar *ptr = (const uchar *)unpack_info.data();
// Skip unpack info if present.
- if (size >= RDB_UNPACK_HEADER_SIZE && ptr[0] == RDB_UNPACK_DATA_TAG) {
+ if (is_unpack_data_tag(ptr[0]) && size >= get_unpack_header_size(ptr[0])) {
const uint16 skip_len = rdb_netbuf_to_uint16(ptr + 1);
SHIP_ASSERT(size >= skip_len);
@@ -527,15 +825,125 @@ int Rdb_key_def::successor(uchar *const packed_tuple, const uint &len) {
return changed;
}
-uchar *Rdb_key_def::pack_field(
- Field *const field,
- Rdb_field_packing *pack_info,
- uchar * tuple,
- uchar *const packed_tuple,
- uchar *const pack_buffer,
- Rdb_string_writer *const unpack_info,
- uint *const n_null_fields) const
-{
+static const std::map<char, size_t> UNPACK_HEADER_SIZES = {
+ {RDB_UNPACK_DATA_TAG, RDB_UNPACK_HEADER_SIZE},
+ {RDB_UNPACK_COVERED_DATA_TAG, RDB_UNPACK_COVERED_HEADER_SIZE}};
+
+/*
+ @return The length in bytes of the header specified by the given tag
+*/
+size_t Rdb_key_def::get_unpack_header_size(char tag) {
+ DBUG_ASSERT(is_unpack_data_tag(tag));
+ return UNPACK_HEADER_SIZES.at(tag);
+}
+
+/*
+ Get a bitmap indicating which varchar columns must be covered for this
+ lookup to be covered. If the bitmap is a subset of the covered bitmap, then
+ the lookup is covered. If it can already be determined that the lookup is
+ not covered, map->bitmap will be set to null.
+ */
+void Rdb_key_def::get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const {
+ DBUG_ASSERT(map->bitmap == nullptr);
+ bitmap_init(map, nullptr, MAX_REF_PARTS, false);
+ uint curr_bitmap_pos = 0;
+
+ // Indicates which columns in the read set might be covered.
+ MY_BITMAP maybe_covered_bitmap;
+ bitmap_init(&maybe_covered_bitmap, nullptr, table->read_set->n_bits, false);
+
+ for (uint i = 0; i < m_key_parts; i++) {
+ if (table_has_hidden_pk(table) && i + 1 == m_key_parts) {
+ continue;
+ }
+
+ Field *const field = m_pack_info[i].get_field_in_table(table);
+
+ // Columns which are always covered are not stored in the covered bitmap so
+ // we can ignore them here too.
+ if (m_pack_info[i].m_covered &&
+ bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ continue;
+ }
+
+ switch (field->real_type()) {
+ // This type may be covered depending on the record. If it was requested,
+ // we require the covered bitmap to have this bit set.
+ case MYSQL_TYPE_VARCHAR:
+ if (curr_bitmap_pos < MAX_REF_PARTS) {
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_set_bit(map, curr_bitmap_pos);
+ bitmap_set_bit(&maybe_covered_bitmap, field->field_index);
+ }
+ curr_bitmap_pos++;
+ } else {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ // This column is a type which is never covered. If it was requested, we
+ // know this lookup will never be covered.
+ default:
+ if (bitmap_is_set(table->read_set, field->field_index)) {
+ bitmap_free(&maybe_covered_bitmap);
+ bitmap_free(map);
+ return;
+ }
+ break;
+ }
+ }
+
+ // If there are columns which are not covered in the read set, the lookup
+ // can't be covered.
+ if (!bitmap_cmp(table->read_set, &maybe_covered_bitmap)) {
+ bitmap_free(map);
+ }
+ bitmap_free(&maybe_covered_bitmap);
+}
+
+/*
+ Return true if for this secondary index
+ - All of the requested columns are in the index
+ - All values for columns that are prefix-only indexes are shorter or equal
+ in length to the prefix
+ */
+bool Rdb_key_def::covers_lookup(TABLE *const table,
+ const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const lookup_bitmap) const {
+ DBUG_ASSERT(lookup_bitmap != nullptr);
+ if (!use_covered_bitmap_format() || lookup_bitmap->bitmap == nullptr) {
+ return false;
+ }
+
+ Rdb_string_reader unp_reader = Rdb_string_reader::read_or_empty(unpack_info);
+
+ // Check if this unpack_info has a covered_bitmap
+ const char *unpack_header = unp_reader.get_current_ptr();
+ const bool has_covered_unpack_info =
+ unp_reader.remaining_bytes() &&
+ unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG;
+ if (!has_covered_unpack_info ||
+ !unp_reader.read(RDB_UNPACK_COVERED_HEADER_SIZE)) {
+ return false;
+ }
+
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
+
+ return bitmap_is_subset(lookup_bitmap, &covered_bitmap);
+}
+
+uchar *Rdb_key_def::pack_field(Field *const field, Rdb_field_packing *pack_info,
+ uchar *tuple, uchar *const packed_tuple,
+ uchar *const pack_buffer,
+ Rdb_string_writer *const unpack_info,
+ uint *const n_null_fields) const {
if (field->real_maybe_null()) {
DBUG_ASSERT(is_storage_available(tuple - packed_tuple, 1));
if (field->is_real_null()) {
@@ -560,12 +968,13 @@ uchar *Rdb_key_def::pack_field(
DBUG_ASSERT(is_storage_available(tuple - packed_tuple,
pack_info->m_max_image_len));
- pack_info->m_pack_func(pack_info, field, pack_buffer, &tuple, &pack_ctx);
+ (this->*pack_info->m_pack_func)(pack_info, field, pack_buffer, &tuple,
+ &pack_ctx);
/* Make "unpack info" to be stored in the value */
if (create_unpack_info) {
- pack_info->m_make_unpack_info_func(pack_info->m_charset_codec, field,
- &pack_ctx);
+ (this->*pack_info->m_make_unpack_info_func)(pack_info->m_charset_codec,
+ field, &pack_ctx);
}
return tuple;
@@ -584,6 +993,7 @@ uchar *Rdb_key_def::pack_field(
unpack_info_len OUT Unpack data length
n_key_parts Number of keyparts to process. 0 means all of them.
n_null_fields OUT Number of key fields with NULL value.
+ ttl_pk_offset OUT Offset of the ttl column if specified and in the key
@detail
Some callers do not need the unpack information, they can pass
@@ -593,13 +1003,12 @@ uchar *Rdb_key_def::pack_field(
Length of the packed tuple
*/
-uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
- const uchar *const record,
- uchar *const packed_tuple,
- Rdb_string_writer *const unpack_info,
- const bool &should_store_row_debug_checksums,
- const longlong &hidden_pk_id, uint n_key_parts,
- uint *const n_null_fields) const {
+uint Rdb_key_def::pack_record(
+ const TABLE *const tbl, uchar *const pack_buffer, const uchar *const record,
+ uchar *const packed_tuple, Rdb_string_writer *const unpack_info,
+ const bool &should_store_row_debug_checksums, const longlong &hidden_pk_id,
+ uint n_key_parts, uint *const n_null_fields, uint *const ttl_pk_offset,
+ const char *const ttl_bytes) const {
DBUG_ASSERT(tbl != nullptr);
DBUG_ASSERT(pack_buffer != nullptr);
DBUG_ASSERT(record != nullptr);
@@ -610,7 +1019,9 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
(m_index_type == INDEX_TYPE_SECONDARY));
uchar *tuple = packed_tuple;
+ size_t unpack_start_pos = size_t(-1);
size_t unpack_len_pos = size_t(-1);
+ size_t covered_bitmap_pos = size_t(-1);
const bool hidden_pk_exists = table_has_hidden_pk(tbl);
rdb_netbuf_store_index(tuple, m_index_number);
@@ -632,14 +1043,57 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
if (n_null_fields)
*n_null_fields = 0;
+ // Check if we need a covered bitmap. If it is certain that all key parts are
+ // covering, we don't need one.
+ bool store_covered_bitmap = false;
+ if (unpack_info && use_covered_bitmap_format()) {
+ for (uint i = 0; i < n_key_parts; i++) {
+ if (!m_pack_info[i].m_covered) {
+ store_covered_bitmap = true;
+ break;
+ }
+ }
+ }
+
+ const char tag =
+ store_covered_bitmap ? RDB_UNPACK_COVERED_DATA_TAG : RDB_UNPACK_DATA_TAG;
+
if (unpack_info) {
unpack_info->clear();
- unpack_info->write_uint8(RDB_UNPACK_DATA_TAG);
+
+ if (m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0) {
+ // Reserve space for index flag fields
+ unpack_info->allocate(m_total_index_flags_length);
+
+ // Insert TTL timestamp
+ if (has_ttl() && ttl_bytes) {
+ write_index_flag_field(unpack_info,
+ reinterpret_cast<const uchar *const>(ttl_bytes),
+ Rdb_key_def::TTL_FLAG);
+ }
+ }
+
+ unpack_start_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint8(tag);
unpack_len_pos = unpack_info->get_current_pos();
// we don't know the total length yet, so write a zero
unpack_info->write_uint16(0);
+
+ if (store_covered_bitmap) {
+ // Reserve two bytes for the covered bitmap. This will store, for key
+ // parts which are not always covering, whether or not it is covering
+ // for this record.
+ covered_bitmap_pos = unpack_info->get_current_pos();
+ unpack_info->write_uint16(0);
+ }
}
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ uint curr_bitmap_pos = 0;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+
for (uint i = 0; i < n_key_parts; i++) {
// Fill hidden pk id into the last key part for secondary keys for tables
// with no pk
@@ -654,6 +1108,17 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
uint field_offset = field->ptr - tbl->record[0];
uint null_offset = field->null_offset(tbl->record[0]);
bool maybe_null = field->real_maybe_null();
+
+ // Save the ttl duration offset in the key so we can store it in front of
+ // the record later.
+ if (ttl_pk_offset && m_ttl_duration > 0 && i == m_ttl_pk_key_part_offset) {
+ DBUG_ASSERT(field_check_field_name_match(field, m_ttl_column.c_str()));
+ DBUG_ASSERT(field->real_type() == MYSQL_TYPE_LONGLONG);
+ DBUG_ASSERT(field->key_type() == HA_KEYTYPE_ULONGLONG);
+ DBUG_ASSERT(!field->real_maybe_null());
+ *ttl_pk_offset = tuple - packed_tuple;
+ }
+
field->move_field(const_cast<uchar*>(record) + field_offset,
maybe_null ? const_cast<uchar*>(record) + null_offset : nullptr,
field->null_bit);
@@ -662,6 +1127,25 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
tuple = pack_field(field, &m_pack_info[i], tuple, packed_tuple, pack_buffer,
unpack_info, n_null_fields);
+ // If this key part is a prefix of a VARCHAR field, check if it's covered.
+ if (store_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
+ !m_pack_info[i].m_covered && curr_bitmap_pos < MAX_REF_PARTS) {
+ size_t data_length = field->data_length();
+ uint16 key_length;
+ if (m_pk_part_no[i] == (uint)-1) {
+ key_length = tbl->key_info[get_keyno()].key_part[i].length;
+ } else {
+ key_length =
+ tbl->key_info[tbl->s->primary_key].key_part[m_pk_part_no[i]].length;
+ }
+
+ if (m_pack_info[i].m_unpack_func != nullptr &&
+ data_length <= key_length) {
+ bitmap_set_bit(&covered_bitmap, curr_bitmap_pos);
+ }
+ curr_bitmap_pos++;
+ }
+
// Restore field->ptr and field->null_ptr
field->move_field(tbl->record[0] + field_offset,
maybe_null ? tbl->record[0] + null_offset : nullptr,
@@ -669,7 +1153,7 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
}
if (unpack_info) {
- const size_t len = unpack_info->get_current_pos();
+ const size_t len = unpack_info->get_current_pos() - unpack_start_pos;
DBUG_ASSERT(len <= std::numeric_limits<uint16_t>::max());
// Don't store the unpack_info if it has only the header (that is, there's
@@ -677,9 +1161,12 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer,
// Primary Keys are special: for them, store the unpack_info even if it's
// empty (provided m_maybe_unpack_info==true, see
// ha_rocksdb::convert_record_to_storage_format)
- if (len == RDB_UNPACK_HEADER_SIZE &&
- m_index_type != Rdb_key_def::INDEX_TYPE_PRIMARY) {
- unpack_info->clear();
+ if (m_index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) {
+ if (len == get_unpack_header_size(tag) && !covered_bits) {
+ unpack_info->truncate(unpack_start_pos);
+ } else if (store_covered_bitmap) {
+ unpack_info->write_uint16_at(covered_bitmap_pos, covered_bits);
+ }
} else {
unpack_info->write_uint16_at(unpack_len_pos, len);
}
@@ -739,10 +1226,10 @@ uint Rdb_key_def::pack_hidden_pk(const longlong &hidden_pk_id,
Function of type rdb_index_field_pack_t
*/
-void rdb_pack_with_make_sort_key(
+void Rdb_key_def::pack_with_make_sort_key(
Rdb_field_packing *const fpi, Field *const field,
uchar *const buf MY_ATTRIBUTE((__unused__)), uchar **dst,
- Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) {
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const {
DBUG_ASSERT(fpi != nullptr);
DBUG_ASSERT(field != nullptr);
DBUG_ASSERT(dst != nullptr);
@@ -812,9 +1299,9 @@ int Rdb_key_def::compare_keys(const rocksdb::Slice *key1,
const auto before_skip1 = reader1.get_current_ptr();
const auto before_skip2 = reader2.get_current_ptr();
DBUG_ASSERT(fpi->m_skip_func);
- if (fpi->m_skip_func(fpi, nullptr, &reader1))
+ if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader1))
return HA_EXIT_FAILURE;
- if (fpi->m_skip_func(fpi, nullptr, &reader2))
+ if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader2))
return HA_EXIT_FAILURE;
const auto size1 = reader1.get_current_ptr() - before_skip1;
const auto size2 = reader2.get_current_ptr() - before_skip2;
@@ -855,7 +1342,7 @@ size_t Rdb_key_def::key_length(const TABLE *const table,
const Field *field = nullptr;
if (m_index_type != INDEX_TYPE_HIDDEN_PRIMARY)
field = fpi->get_field_in_table(table);
- if (fpi->m_skip_func(fpi, field, &reader))
+ if ((this->*fpi->m_skip_func)(fpi, field, &reader))
return size_t(-1);
}
return key.size() - reader.remaining_bytes();
@@ -887,7 +1374,8 @@ int Rdb_key_def::unpack_field(
}
}
- return fpi->m_unpack_func(fpi, field, field->ptr, reader, unp_reader);
+ return (this->*fpi->m_unpack_func)(fpi, field, field->ptr, reader,
+ unp_reader);
}
/*
@@ -897,8 +1385,8 @@ int Rdb_key_def::unpack_field(
not all indexes support this
@return
- UNPACK_SUCCESS - Ok
- UNPACK_FAILURE - Data format error.
+ HA_EXIT_SUCCESS OK
+ other HA_ERR error code
*/
int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
@@ -918,17 +1406,36 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
// Skip the index number
if ((!reader.read(INDEX_NUMBER_SIZE))) {
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
// For secondary keys, we expect the value field to contain unpack data and
// checksum data in that order. One or both can be missing, but they cannot
// be reordered.
+ const char *unpack_header = unp_reader.get_current_ptr();
const bool has_unpack_info =
- unp_reader.remaining_bytes() &&
- *unp_reader.get_current_ptr() == RDB_UNPACK_DATA_TAG;
- if (has_unpack_info && !unp_reader.read(RDB_UNPACK_HEADER_SIZE)) {
- return HA_EXIT_FAILURE;
+ unp_reader.remaining_bytes() && is_unpack_data_tag(unpack_header[0]);
+ if (has_unpack_info) {
+ if ((m_index_type == INDEX_TYPE_SECONDARY &&
+ m_total_index_flags_length > 0 &&
+ !unp_reader.read(m_total_index_flags_length)) ||
+ !unp_reader.read(get_unpack_header_size(unpack_header[0]))) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
+ }
+ }
+
+ // Read the covered bitmap
+ MY_BITMAP covered_bitmap;
+ my_bitmap_map covered_bits;
+ uint curr_bitmap_pos = 0;
+ bitmap_init(&covered_bitmap, &covered_bits, MAX_REF_PARTS, false);
+
+ const bool has_covered_bitmap =
+ has_unpack_info && (unpack_header[0] == RDB_UNPACK_COVERED_DATA_TAG);
+ if (has_covered_bitmap) {
+ covered_bits = rdb_netbuf_to_uint16((const uchar *)unpack_header +
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) +
+ RDB_UNPACK_COVERED_DATA_LEN_SIZE);
}
for (uint i = 0; i < m_key_parts; i++) {
@@ -941,15 +1448,21 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
if ((secondary_key && hidden_pk_exists && i + 1 == m_key_parts) ||
is_hidden_pk) {
DBUG_ASSERT(fpi->m_unpack_func);
- if (fpi->m_skip_func(fpi, nullptr, &reader)) {
- return HA_EXIT_FAILURE;
+ if ((this->*fpi->m_skip_func)(fpi, nullptr, &reader)) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
continue;
}
Field *const field = fpi->get_field_in_table(table);
- if (fpi->m_unpack_func) {
+ bool covered_column = true;
+ if (has_covered_bitmap && field->real_type() == MYSQL_TYPE_VARCHAR &&
+ !m_pack_info[i].m_covered) {
+ covered_column = curr_bitmap_pos < MAX_REF_PARTS &&
+ bitmap_is_set(&covered_bitmap, curr_bitmap_pos++);
+ }
+ if (fpi->m_unpack_func && covered_column) {
/* It is possible to unpack this column. Do it. */
uint field_offset = field->ptr - table->record[0];
@@ -974,25 +1487,25 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
maybe_null ? table->record[0] + null_offset : nullptr,
field->null_bit);
- if (res) {
- return res;
+ if (res != UNPACK_SUCCESS) {
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
} else {
/* It is impossible to unpack the column. Skip it. */
if (fpi->m_maybe_null) {
const char *nullp;
if (!(nullp = reader.read(1)))
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
if (*nullp == 0) {
/* This is a NULL value */
continue;
}
/* If NULL marker is not '0', it can be only '1' */
if (*nullp != 1)
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
- if (fpi->m_skip_func(fpi, field, &reader))
- return HA_EXIT_FAILURE;
+ if ((this->*fpi->m_skip_func)(fpi, field, &reader))
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
}
@@ -1018,13 +1531,13 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
if (stored_key_chksum != computed_key_chksum) {
report_checksum_mismatch(true, packed_key->data(), packed_key->size());
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
}
if (stored_val_chksum != computed_val_chksum) {
report_checksum_mismatch(false, unpack_info->data(),
unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE);
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_CHECKSUM_MISMATCH;
}
} else {
/* The checksums are present but we are not checking checksums */
@@ -1032,7 +1545,7 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
}
if (reader.remaining_bytes())
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_CORRUPT_DATA;
return HA_EXIT_SUCCESS;
}
@@ -1078,9 +1591,10 @@ bool Rdb_key_def::index_format_min_check(const int &pk_min,
Function of type rdb_index_field_skip_t
*/
-int rdb_skip_max_length(const Rdb_field_packing *const fpi,
- const Field *const field MY_ATTRIBUTE((__unused__)),
- Rdb_string_reader *const reader) {
+int Rdb_key_def::skip_max_length(const Rdb_field_packing *const fpi,
+ const Field *const field
+ MY_ATTRIBUTE((__unused__)),
+ Rdb_string_reader *const reader) const {
if (!reader->read(fpi->m_max_image_len))
return HA_EXIT_FAILURE;
return HA_EXIT_SUCCESS;
@@ -1089,20 +1603,29 @@ int rdb_skip_max_length(const Rdb_field_packing *const fpi,
/*
(RDB_ESCAPE_LENGTH-1) must be an even number so that pieces of lines are not
split in the middle of an UTF-8 character. See the implementation of
- rdb_unpack_binary_or_utf8_varchar.
+ unpack_binary_or_utf8_varchar.
*/
-const uint RDB_ESCAPE_LENGTH = 9;
+#define RDB_ESCAPE_LENGTH 9
+#define RDB_LEGACY_ESCAPE_LENGTH RDB_ESCAPE_LENGTH
static_assert((RDB_ESCAPE_LENGTH - 1) % 2 == 0,
"RDB_ESCAPE_LENGTH-1 must be even.");
+#define RDB_ENCODED_SIZE(len) \
+ ((len + (RDB_ESCAPE_LENGTH - 2)) / (RDB_ESCAPE_LENGTH - 1)) * \
+ RDB_ESCAPE_LENGTH
+
+#define RDB_LEGACY_ENCODED_SIZE(len) \
+ ((len + (RDB_LEGACY_ESCAPE_LENGTH - 1)) / (RDB_LEGACY_ESCAPE_LENGTH - 1)) * \
+ RDB_LEGACY_ESCAPE_LENGTH
+
/*
Function of type rdb_index_field_skip_t
*/
-static int rdb_skip_variable_length(
+int Rdb_key_def::skip_variable_length(
const Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_string_reader *const reader) {
+ const Field *const field, Rdb_string_reader *const reader) const {
const uchar *ptr;
bool finished = false;
@@ -1115,21 +1638,29 @@ static int rdb_skip_variable_length(
dst_len = UINT_MAX;
}
+ bool use_legacy_format = use_legacy_varbinary_format();
+
/* Decode the length-emitted encoding here */
while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) {
- /* See rdb_pack_with_varchar_encoding. */
- const uchar pad =
- 255 - ptr[RDB_ESCAPE_LENGTH - 1]; // number of padding bytes
- const uchar used_bytes = RDB_ESCAPE_LENGTH - 1 - pad;
+ uint used_bytes;
+
+ /* See pack_with_varchar_encoding. */
+ if (use_legacy_format) {
+ used_bytes = calc_unpack_legacy_variable_format(
+ ptr[RDB_ESCAPE_LENGTH - 1], &finished);
+ } else {
+ used_bytes =
+ calc_unpack_variable_format(ptr[RDB_ESCAPE_LENGTH - 1], &finished);
+ }
- if (used_bytes > RDB_ESCAPE_LENGTH - 1 || used_bytes > dst_len) {
- return HA_EXIT_FAILURE; /* cannot store that much, invalid data */
+ if (used_bytes == (uint)-1 || dst_len < used_bytes) {
+ return HA_EXIT_FAILURE; // Corruption in the data
}
- if (used_bytes < RDB_ESCAPE_LENGTH - 1) {
- finished = true;
+ if (finished) {
break;
}
+
dst_len -= used_bytes;
}
@@ -1148,9 +1679,9 @@ const int VARCHAR_CMP_GREATER_THAN_SPACES = 3;
Skip a keypart that uses Variable-Length Space-Padded encoding
*/
-static int rdb_skip_variable_space_pad(const Rdb_field_packing *const fpi,
- const Field *const field,
- Rdb_string_reader *const reader) {
+int Rdb_key_def::skip_variable_space_pad(
+ const Rdb_field_packing *const fpi, const Field *const field,
+ Rdb_string_reader *const reader) const {
const uchar *ptr;
bool finished = false;
@@ -1164,7 +1695,7 @@ static int rdb_skip_variable_space_pad(const Rdb_field_packing *const fpi,
/* Decode the length-emitted encoding here */
while ((ptr = (const uchar *)reader->read(fpi->m_segment_size))) {
- // See rdb_pack_with_varchar_space_pad
+ // See pack_with_varchar_space_pad
const uchar c = ptr[fpi->m_segment_size - 1];
if (c == VARCHAR_CMP_EQUAL_TO_SPACES) {
// This is the last segment
@@ -1192,10 +1723,10 @@ static int rdb_skip_variable_space_pad(const Rdb_field_packing *const fpi,
Function of type rdb_index_field_unpack_t
*/
-int rdb_unpack_integer(Rdb_field_packing *const fpi, Field *const field,
- uchar *const to, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) {
+int Rdb_key_def::unpack_integer(
+ Rdb_field_packing *const fpi, Field *const field, uchar *const to,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
const int length = fpi->m_max_image_len;
const uchar *from;
@@ -1260,10 +1791,11 @@ static void rdb_swap_float_bytes(uchar *const dst, const uchar *const src) {
#define rdb_swap_float_bytes nullptr
#endif
-static int rdb_unpack_floating_point(
+int Rdb_key_def::unpack_floating_point(
uchar *const dst, Rdb_string_reader *const reader, const size_t &size,
const int &exp_digit, const uchar *const zero_pattern,
- const uchar *const zero_val, void (*swap_func)(uchar *, const uchar *)) {
+ const uchar *const zero_val,
+ void (*swap_func)(uchar *, const uchar *)) const {
const uchar *const from = (const uchar *)reader->read(size);
if (from == nullptr)
return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
@@ -1321,17 +1853,17 @@ static int rdb_unpack_floating_point(
Note also that this code assumes that NaN and +/-Infinity are never
allowed in the database.
*/
-static int rdb_unpack_double(
+int Rdb_key_def::unpack_double(
Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
Field *const field MY_ATTRIBUTE((__unused__)), uchar *const field_ptr,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
static double zero_val = 0.0;
static const uchar zero_pattern[8] = {128, 0, 0, 0, 0, 0, 0, 0};
- return rdb_unpack_floating_point(
- field_ptr, reader, sizeof(double), DBL_EXP_DIG, zero_pattern,
- (const uchar *)&zero_val, rdb_swap_double_bytes);
+ return unpack_floating_point(field_ptr, reader, sizeof(double), DBL_EXP_DIG,
+ zero_pattern, (const uchar *)&zero_val,
+ rdb_swap_double_bytes);
}
#if !defined(FLT_EXP_DIG)
@@ -1346,16 +1878,16 @@ static int rdb_unpack_double(
Note also that this code assumes that NaN and +/-Infinity are never
allowed in the database.
*/
-static int rdb_unpack_float(
- Rdb_field_packing *const, Field *const field MY_ATTRIBUTE((__unused__)),
+int Rdb_key_def::unpack_float(
+ Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)),
uchar *const field_ptr, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
static float zero_val = 0.0;
static const uchar zero_pattern[4] = {128, 0, 0, 0};
- return rdb_unpack_floating_point(
- field_ptr, reader, sizeof(float), FLT_EXP_DIG, zero_pattern,
- (const uchar *)&zero_val, rdb_swap_float_bytes);
+ return unpack_floating_point(field_ptr, reader, sizeof(float), FLT_EXP_DIG,
+ zero_pattern, (const uchar *)&zero_val,
+ rdb_swap_float_bytes);
}
/*
@@ -1363,10 +1895,10 @@ static int rdb_unpack_float(
Unpack by doing the reverse action to Field_newdate::make_sort_key.
*/
-int rdb_unpack_newdate(Rdb_field_packing *const fpi, Field *constfield,
- uchar *const field_ptr, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) {
+int Rdb_key_def::unpack_newdate(
+ Rdb_field_packing *const fpi, Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const field_ptr, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
const char *from;
DBUG_ASSERT(fpi->m_max_image_len == 3);
@@ -1385,10 +1917,10 @@ int rdb_unpack_newdate(Rdb_field_packing *const fpi, Field *constfield,
This is for BINARY(n) where the value occupies the whole length.
*/
-static int rdb_unpack_binary_str(
+int Rdb_key_def::unpack_binary_str(
Rdb_field_packing *const fpi, Field *const field, uchar *const to,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
const char *from;
if (!(from = reader->read(fpi->m_max_image_len)))
return UNPACK_FAILURE; /* Mem-comparable image doesn't have enough bytes */
@@ -1403,10 +1935,10 @@ static int rdb_unpack_binary_str(
UTF8 sequences.
*/
-static int rdb_unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field,
- uchar *dst, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader
- MY_ATTRIBUTE((__unused__))) {
+int Rdb_key_def::unpack_utf8_str(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
my_core::CHARSET_INFO *const cset = (my_core::CHARSET_INFO *)field->charset();
const uchar *src;
if (!(src = (const uchar *)reader->read(fpi->m_max_image_len)))
@@ -1431,22 +1963,118 @@ static int rdb_unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field,
}
/*
+ This is the original algorithm to encode a variable binary field. It
+ sets a flag byte every Nth byte. The flag value is (255 - #pad) where
+ #pad is the number of padding bytes that were needed (0 if all N-1
+ bytes were used).
+
+ If N=8 and the field is:
+ * 3 bytes (1, 2, 3) this is encoded as: 1, 2, 3, 0, 0, 0, 0, 251
+ * 4 bytes (1, 2, 3, 0) this is encoded as: 1, 2, 3, 0, 0, 0, 0, 252
+ And the 4 byte string compares as greater than the 3 byte string
+
+ Unfortunately the algorithm has a flaw. If the input is exactly a
+ multiple of N-1, an extra N bytes are written. Since we usually use
+ N=9, an 8 byte input will generate 18 bytes of output instead of the
+ 9 bytes of output that is optimal.
+
+ See pack_variable_format for the newer algorithm.
+*/
+void Rdb_key_def::pack_legacy_variable_format(
+ const uchar *src, // The data to encode
+ size_t src_len, // The length of the data to encode
+ uchar **dst) const // The location to encode the data
+{
+ size_t copy_len;
+ size_t padding_bytes;
+ uchar *ptr = *dst;
+
+ do {
+ copy_len = std::min((size_t)RDB_LEGACY_ESCAPE_LENGTH - 1, src_len);
+ padding_bytes = RDB_LEGACY_ESCAPE_LENGTH - 1 - copy_len;
+ memcpy(ptr, src, copy_len);
+ ptr += copy_len;
+ src += copy_len;
+ // pad with zeros if necessary
+ if (padding_bytes > 0) {
+ memset(ptr, 0, padding_bytes);
+ ptr += padding_bytes;
+ }
+
+ *(ptr++) = 255 - padding_bytes;
+
+ src_len -= copy_len;
+ } while (padding_bytes == 0);
+
+ *dst = ptr;
+}
+
+/*
+ This is the new algorithm. Similarly to the legacy format the input
+ is split up into N-1 bytes and a flag byte is used as the Nth byte
+ in the output.
+
+ - If the previous segment needed any padding the flag is set to the
+ number of bytes used (0..N-2). 0 is possible in the first segment
+ if the input is 0 bytes long.
+ - If no padding was used and there is no more data left in the input
+ the flag is set to N-1
+ - If no padding was used and there is still data left in the input the
+ flag is set to N.
+
+ For N=9, the following input values encode to the specified
+ outout (where 'X' indicates a byte of the original input):
+ - 0 bytes is encoded as 0 0 0 0 0 0 0 0 0
+ - 1 byte is encoded as X 0 0 0 0 0 0 0 1
+ - 2 bytes is encoded as X X 0 0 0 0 0 0 2
+ - 7 bytes is encoded as X X X X X X X 0 7
+ - 8 bytes is encoded as X X X X X X X X 8
+ - 9 bytes is encoded as X X X X X X X X 9 X 0 0 0 0 0 0 0 1
+ - 10 bytes is encoded as X X X X X X X X 9 X X 0 0 0 0 0 0 2
+*/
+void Rdb_key_def::pack_variable_format(
+ const uchar *src, // The data to encode
+ size_t src_len, // The length of the data to encode
+ uchar **dst) const // The location to encode the data
+{
+ uchar *ptr = *dst;
+
+ for (;;) {
+ // Figure out how many bytes to copy, copy them and adjust pointers
+ const size_t copy_len = std::min((size_t)RDB_ESCAPE_LENGTH - 1, src_len);
+ memcpy(ptr, src, copy_len);
+ ptr += copy_len;
+ src += copy_len;
+ src_len -= copy_len;
+
+ // Are we at the end of the input?
+ if (src_len == 0) {
+ // pad with zeros if necessary;
+ const size_t padding_bytes = RDB_ESCAPE_LENGTH - 1 - copy_len;
+ if (padding_bytes > 0) {
+ memset(ptr, 0, padding_bytes);
+ ptr += padding_bytes;
+ }
+
+ // Put the flag byte (0 - N-1) in the output
+ *(ptr++) = (uchar)copy_len;
+ break;
+ }
+
+ // We have more data - put the flag byte (N) in and continue
+ *(ptr++) = RDB_ESCAPE_LENGTH;
+ }
+
+ *dst = ptr;
+}
+
+/*
Function of type rdb_index_field_pack_t
*/
-static void rdb_pack_with_varchar_encoding(
+void Rdb_key_def::pack_with_varchar_encoding(
Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
- Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) {
- /*
- Use a flag byte every Nth byte. Set it to (255 - #pad) where #pad is 0
- when the var length field filled all N-1 previous bytes and #pad is
- otherwise the number of padding bytes used.
-
- If N=8 and the field is:
- * 3 bytes (1, 2, 3) this is encoded as: 1, 2, 3, 0, 0, 0, 0, 251
- * 4 bytes (1, 2, 3, 0) this is encoded as: 1, 2, 3, 0, 0, 0, 0, 252
- And the 4 byte string compares as greater than the 3 byte string
- */
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const {
const CHARSET_INFO *const charset = field->charset();
Field_varstring *const field_var = (Field_varstring *)field;
@@ -1458,26 +2086,11 @@ static void rdb_pack_with_varchar_encoding(
field_var->ptr + field_var->length_bytes, value_length, 0);
/* Got a mem-comparable image in 'buf'. Now, produce varlength encoding */
-
- size_t encoded_size = 0;
- uchar *ptr = *dst;
- while (1) {
- const size_t copy_len = std::min((size_t)RDB_ESCAPE_LENGTH - 1, xfrm_len);
- const size_t padding_bytes = RDB_ESCAPE_LENGTH - 1 - copy_len;
- memcpy(ptr, buf, copy_len);
- ptr += copy_len;
- buf += copy_len;
- // pad with zeros if necessary;
- for (size_t idx = 0; idx < padding_bytes; idx++)
- *(ptr++) = 0;
- *(ptr++) = 255 - (uchar)padding_bytes;
-
- xfrm_len -= copy_len;
- encoded_size += RDB_ESCAPE_LENGTH;
- if (padding_bytes != 0)
- break;
+ if (use_legacy_varbinary_format()) {
+ pack_legacy_variable_format(buf, xfrm_len, dst);
+ } else {
+ pack_variable_format(buf, xfrm_len, dst);
}
- *dst += encoded_size;
}
/*
@@ -1564,16 +2177,15 @@ static const int RDB_TRIMMED_CHARS_OFFSET = 8;
then store it as unsigned.
@seealso
- rdb_unpack_binary_or_utf8_varchar_space_pad
- rdb_unpack_simple_varchar_space_pad
- rdb_dummy_make_unpack_info
- rdb_skip_variable_space_pad
+ unpack_binary_or_utf8_varchar_space_pad
+ unpack_simple_varchar_space_pad
+ dummy_make_unpack_info
+ skip_variable_space_pad
*/
-static void
-rdb_pack_with_varchar_space_pad(Rdb_field_packing *const fpi,
- Field *const field, uchar *buf, uchar **dst,
- Rdb_pack_field_context *const pack_ctx) {
+void Rdb_key_def::pack_with_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx) const {
Rdb_string_writer *const unpack_info = pack_ctx->writer;
const CHARSET_INFO *const charset = field->charset();
const auto field_var = static_cast<Field_varstring *>(field);
@@ -1655,13 +2267,92 @@ rdb_pack_with_varchar_space_pad(Rdb_field_packing *const fpi,
}
/*
+ Calculate the number of used bytes in the chunk and whether this is the
+ last chunk in the input. This is based on the old legacy format - see
+ pack_legacy_variable_format.
+ */
+uint Rdb_key_def::calc_unpack_legacy_variable_format(uchar flag,
+ bool *done) const {
+ uint pad = 255 - flag;
+ uint used_bytes = RDB_LEGACY_ESCAPE_LENGTH - 1 - pad;
+ if (used_bytes > RDB_LEGACY_ESCAPE_LENGTH - 1) {
+ return (uint)-1;
+ }
+
+ *done = used_bytes < RDB_LEGACY_ESCAPE_LENGTH - 1;
+ return used_bytes;
+}
+
+/*
+ Calculate the number of used bytes in the chunk and whether this is the
+ last chunk in the input. This is based on the new format - see
+ pack_variable_format.
+ */
+uint Rdb_key_def::calc_unpack_variable_format(uchar flag, bool *done) const {
+ // Check for invalid flag values
+ if (flag > RDB_ESCAPE_LENGTH) {
+ return (uint)-1;
+ }
+
+ // Values from 1 to N-1 indicate this is the last chunk and that is how
+ // many bytes were used
+ if (flag < RDB_ESCAPE_LENGTH) {
+ *done = true;
+ return flag;
+ }
+
+ // A value of N means we used N-1 bytes and had more to go
+ *done = false;
+ return RDB_ESCAPE_LENGTH - 1;
+}
+
+/*
+ Unpack data that has charset information. Each two bytes of the input is
+ treated as a wide-character and converted to its multibyte equivalent in
+ the output.
+ */
+static int
+unpack_charset(const CHARSET_INFO *cset, // character set information
+ const uchar *src, // source data to unpack
+ uint src_len, // length of source data
+ uchar *dst, // destination of unpacked data
+ uint dst_len, // length of destination data
+ uint *used_bytes) // output number of bytes used
+{
+ if (src_len & 1) {
+ /*
+ UTF-8 characters are encoded into two-byte entities. There is no way
+ we can have an odd number of bytes after encoding.
+ */
+ return UNPACK_FAILURE;
+ }
+
+ uchar *dst_end = dst + dst_len;
+ uint used = 0;
+
+ for (uint ii = 0; ii < src_len; ii += 2) {
+ my_wc_t wc = (src[ii] << 8) | src[ii + 1];
+ int res = cset->cset->wc_mb(cset, wc, dst + used, dst_end);
+ DBUG_ASSERT(res > 0 && res <= 3);
+ if (res < 0) {
+ return UNPACK_FAILURE;
+ }
+
+ used += res;
+ }
+
+ *used_bytes = used;
+ return UNPACK_SUCCESS;
+}
+
+/*
Function of type rdb_index_field_unpack_t
*/
-static int rdb_unpack_binary_or_utf8_varchar(
+int Rdb_key_def::unpack_binary_or_utf8_varchar(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) {
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const {
const uchar *ptr;
size_t len = 0;
bool finished = false;
@@ -1670,64 +2361,51 @@ static int rdb_unpack_binary_or_utf8_varchar(
dst += field_var->length_bytes;
// How much we can unpack
size_t dst_len = field_var->pack_length() - field_var->length_bytes;
- uchar *const dst_end = dst + dst_len;
+
+ bool use_legacy_format = use_legacy_varbinary_format();
/* Decode the length-emitted encoding here */
while ((ptr = (const uchar *)reader->read(RDB_ESCAPE_LENGTH))) {
- /* See rdb_pack_with_varchar_encoding. */
- uchar pad = 255 - ptr[RDB_ESCAPE_LENGTH - 1]; // number of padding bytes
- uchar used_bytes = RDB_ESCAPE_LENGTH - 1 - pad;
+ uint used_bytes;
- if (used_bytes > RDB_ESCAPE_LENGTH - 1) {
- return UNPACK_FAILURE; /* cannot store that much, invalid data */
+ /* See pack_with_varchar_encoding. */
+ if (use_legacy_format) {
+ used_bytes = calc_unpack_legacy_variable_format(
+ ptr[RDB_ESCAPE_LENGTH - 1], &finished);
+ } else {
+ used_bytes =
+ calc_unpack_variable_format(ptr[RDB_ESCAPE_LENGTH - 1], &finished);
}
- if (dst_len < used_bytes) {
- /* Encoded index tuple is longer than the size in the record buffer? */
- return UNPACK_FAILURE;
+ if (used_bytes == (uint)-1 || dst_len < used_bytes) {
+ return UNPACK_FAILURE; // Corruption in the data
}
/*
Now, we need to decode used_bytes of data and append them to the value.
*/
if (fpi->m_varchar_charset->number == COLLATION_UTF8_BIN) {
- if (used_bytes & 1) {
- /*
- UTF-8 characters are encoded into two-byte entities. There is no way
- we can have an odd number of bytes after encoding.
- */
- return UNPACK_FAILURE;
- }
-
- const uchar *src = ptr;
- const uchar *src_end = ptr + used_bytes;
- while (src < src_end) {
- my_wc_t wc = (src[0] << 8) | src[1];
- src += 2;
- const CHARSET_INFO *cset = fpi->m_varchar_charset;
- int res = cset->cset->wc_mb(cset, wc, dst, dst_end);
- DBUG_ASSERT(res > 0 && res <= 3);
- if (res < 0)
- return UNPACK_FAILURE;
- dst += res;
- len += res;
- dst_len -= res;
+ int err = unpack_charset(fpi->m_varchar_charset, ptr, used_bytes, dst,
+ dst_len, &used_bytes);
+ if (err != UNPACK_SUCCESS) {
+ return err;
}
} else {
memcpy(dst, ptr, used_bytes);
- dst += used_bytes;
- dst_len -= used_bytes;
- len += used_bytes;
}
- if (used_bytes < RDB_ESCAPE_LENGTH - 1) {
- finished = true;
+ dst += used_bytes;
+ dst_len -= used_bytes;
+ len += used_bytes;
+
+ if (finished) {
break;
}
}
- if (!finished)
+ if (!finished) {
return UNPACK_FAILURE;
+ }
/* Save the length */
if (field_var->length_bytes == 1) {
@@ -1741,14 +2419,15 @@ static int rdb_unpack_binary_or_utf8_varchar(
/*
@seealso
- rdb_pack_with_varchar_space_pad - packing function
- rdb_unpack_simple_varchar_space_pad - unpacking function for 'simple'
+ pack_with_varchar_space_pad - packing function
+ unpack_simple_varchar_space_pad - unpacking function for 'simple'
charsets.
- rdb_skip_variable_space_pad - skip function
+ skip_variable_space_pad - skip function
*/
-static int rdb_unpack_binary_or_utf8_varchar_space_pad(
+int Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad(
Rdb_field_packing *const fpi, Field *const field, uchar *dst,
- Rdb_string_reader *const reader, Rdb_string_reader *const unp_reader) {
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const {
const uchar *ptr;
size_t len = 0;
bool finished = false;
@@ -1855,9 +2534,9 @@ static int rdb_unpack_binary_or_utf8_varchar_space_pad(
Function of type rdb_make_unpack_info_t
*/
-static void rdb_make_unpack_unknown(
+void Rdb_key_def::make_unpack_unknown(
const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_pack_field_context *const pack_ctx) {
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) const {
pack_ctx->writer->write(field->ptr, field->pack_length());
}
@@ -1866,25 +2545,28 @@ static void rdb_make_unpack_unknown(
available.
The actual unpack_info data is produced by the function that packs the key,
- that is, rdb_pack_with_varchar_space_pad.
+ that is, pack_with_varchar_space_pad.
*/
-static void rdb_dummy_make_unpack_info(
+void Rdb_key_def::dummy_make_unpack_info(
const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
const Field *field MY_ATTRIBUTE((__unused__)),
- Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) {}
+ Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) const {
+ // Do nothing
+}
/*
Function of type rdb_index_field_unpack_t
*/
-static int rdb_unpack_unknown(Rdb_field_packing *const fpi, Field *const field,
- uchar *const dst, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) {
+int Rdb_key_def::unpack_unknown(Rdb_field_packing *const fpi,
+ Field *const field, uchar *const dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const {
const uchar *ptr;
const uint len = fpi->m_unpack_data_len;
// We don't use anything from the key, so skip over it.
- if (rdb_skip_max_length(fpi, field, reader)) {
+ if (skip_max_length(fpi, field, reader)) {
return UNPACK_FAILURE;
}
@@ -1901,9 +2583,9 @@ static int rdb_unpack_unknown(Rdb_field_packing *const fpi, Field *const field,
Function of type rdb_make_unpack_info_t
*/
-static void rdb_make_unpack_unknown_varchar(
+void Rdb_key_def::make_unpack_unknown_varchar(
const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)),
- const Field *const field, Rdb_pack_field_context *const pack_ctx) {
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) const {
const auto f = static_cast<const Field_varstring *>(field);
uint len = f->length_bytes == 1 ? (uint)*f->ptr : uint2korr(f->ptr);
len += f->length_bytes;
@@ -1921,20 +2603,20 @@ static void rdb_make_unpack_unknown_varchar(
the original string, so we keep the whole original string in the unpack_info.
@seealso
- rdb_make_unpack_unknown, rdb_unpack_unknown
+ make_unpack_unknown, unpack_unknown
*/
-static int rdb_unpack_unknown_varchar(Rdb_field_packing *const fpi,
- Field *const field, uchar *dst,
- Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) {
+int Rdb_key_def::unpack_unknown_varchar(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const {
const uchar *ptr;
uchar *const d0 = dst;
const auto f = static_cast<Field_varstring *>(field);
dst += f->length_bytes;
const uint len_bytes = f->length_bytes;
// We don't use anything from the key, so skip over it.
- if (fpi->m_skip_func(fpi, field, reader)) {
+ if ((this->*fpi->m_skip_func)(fpi, field, reader)) {
return UNPACK_FAILURE;
}
@@ -1992,10 +2674,9 @@ static uint rdb_read_unpack_simple(Rdb_bit_reader *const reader,
Make unpack_data for VARCHAR(n) in a "simple" charset.
*/
-static void
-rdb_make_unpack_simple_varchar(const Rdb_collation_codec *const codec,
- const Field *const field,
- Rdb_pack_field_context *const pack_ctx) {
+void Rdb_key_def::make_unpack_simple_varchar(
+ const Rdb_collation_codec *const codec, const Field *const field,
+ Rdb_pack_field_context *const pack_ctx) const {
const auto f = static_cast<const Field_varstring *>(field);
uchar *const src = f->ptr + f->length_bytes;
const size_t src_len =
@@ -2011,14 +2692,14 @@ rdb_make_unpack_simple_varchar(const Rdb_collation_codec *const codec,
Function of type rdb_index_field_unpack_t
@seealso
- rdb_pack_with_varchar_space_pad - packing function
- rdb_unpack_binary_or_utf8_varchar_space_pad - a similar unpacking function
+ pack_with_varchar_space_pad - packing function
+ unpack_binary_or_utf8_varchar_space_pad - a similar unpacking function
*/
-int rdb_unpack_simple_varchar_space_pad(Rdb_field_packing *const fpi,
- Field *const field, uchar *dst,
- Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) {
+int Rdb_key_def::unpack_simple_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const {
const uchar *ptr;
size_t len = 0;
bool finished = false;
@@ -2116,12 +2797,12 @@ int rdb_unpack_simple_varchar_space_pad(Rdb_field_packing *const fpi,
It is CHAR(N), so SQL layer has padded the value with spaces up to N chars.
@seealso
- The VARCHAR variant is in rdb_make_unpack_simple_varchar
+ The VARCHAR variant is in make_unpack_simple_varchar
*/
-static void rdb_make_unpack_simple(const Rdb_collation_codec *const codec,
- const Field *const field,
- Rdb_pack_field_context *const pack_ctx) {
+void Rdb_key_def::make_unpack_simple(
+ const Rdb_collation_codec *const codec, const Field *const field,
+ Rdb_pack_field_context *const pack_ctx) const {
const uchar *const src = field->ptr;
Rdb_bit_writer bit_writer(pack_ctx->writer);
rdb_write_unpack_simple(&bit_writer, codec, src, field->pack_length());
@@ -2131,10 +2812,11 @@ static void rdb_make_unpack_simple(const Rdb_collation_codec *const codec,
Function of type rdb_index_field_unpack_t
*/
-static int rdb_unpack_simple(Rdb_field_packing *const fpi,
- Field *const field MY_ATTRIBUTE((__unused__)),
- uchar *const dst, Rdb_string_reader *const reader,
- Rdb_string_reader *const unp_reader) {
+int Rdb_key_def::unpack_simple(Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const {
const uchar *ptr;
const uint len = fpi->m_max_image_len;
Rdb_bit_reader bit_reader(unp_reader);
@@ -2281,9 +2963,10 @@ rdb_init_collation_mapping(const my_core::CHARSET_INFO *const cs) {
}
cur->m_make_unpack_info_func = {
- {rdb_make_unpack_simple_varchar, rdb_make_unpack_simple}};
- cur->m_unpack_func = {
- {rdb_unpack_simple_varchar_space_pad, rdb_unpack_simple}};
+ &Rdb_key_def::make_unpack_simple_varchar,
+ &Rdb_key_def::make_unpack_simple};
+ cur->m_unpack_func = {&Rdb_key_def::unpack_simple_varchar_space_pad,
+ &Rdb_key_def::unpack_simple};
} else {
// Out of luck for now.
}
@@ -2370,8 +3053,10 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
/* Calculate image length. By default, is is pack_length() */
m_max_image_len =
field ? field->pack_length() : ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN;
- m_skip_func = rdb_skip_max_length;
- m_pack_func = rdb_pack_with_make_sort_key;
+ m_skip_func = &Rdb_key_def::skip_max_length;
+ m_pack_func = &Rdb_key_def::pack_with_make_sort_key;
+
+ m_covered = false;
switch (type) {
case MYSQL_TYPE_LONGLONG:
@@ -2379,15 +3064,18 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_TINY:
- m_unpack_func = rdb_unpack_integer;
+ m_unpack_func = &Rdb_key_def::unpack_integer;
+ m_covered = true;
return true;
case MYSQL_TYPE_DOUBLE:
- m_unpack_func = rdb_unpack_double;
+ m_unpack_func = &Rdb_key_def::unpack_double;
+ m_covered = true;
return true;
case MYSQL_TYPE_FLOAT:
- m_unpack_func = rdb_unpack_float;
+ m_unpack_func = &Rdb_key_def::unpack_float;
+ m_covered = true;
return true;
case MYSQL_TYPE_NEWDECIMAL:
@@ -2404,7 +3092,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
case MYSQL_TYPE_TIME2: /* TIME is packed with Field_timef::make_sort_key */
case MYSQL_TYPE_YEAR: /* YEAR is packed with Field_tiny::make_sort_key */
/* Everything that comes here is packed with just a memcpy(). */
- m_unpack_func = rdb_unpack_binary_str;
+ m_unpack_func = &Rdb_key_def::unpack_binary_str;
+ m_covered = true;
return true;
case MYSQL_TYPE_NEWDATE:
@@ -2413,7 +3102,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
3 bytes, and packing is done by swapping the byte order (for both big-
and little-endian)
*/
- m_unpack_func = rdb_unpack_newdate;
+ m_unpack_func = &Rdb_key_def::unpack_newdate;
+ m_covered = true;
return true;
case MYSQL_TYPE_TINY_BLOB:
case MYSQL_TYPE_MEDIUM_BLOB:
@@ -2460,10 +3150,15 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// The default for varchar is variable-length, without space-padding for
// comparisons
m_varchar_charset = cs;
- m_skip_func = rdb_skip_variable_length;
- m_pack_func = rdb_pack_with_varchar_encoding;
- m_max_image_len =
- (m_max_image_len / (RDB_ESCAPE_LENGTH - 1) + 1) * RDB_ESCAPE_LENGTH;
+ m_skip_func = &Rdb_key_def::skip_variable_length;
+ m_pack_func = &Rdb_key_def::pack_with_varchar_encoding;
+ if (!key_descr || key_descr->use_legacy_varbinary_format()) {
+ m_max_image_len = RDB_LEGACY_ENCODED_SIZE(m_max_image_len);
+ } else {
+ // Calculate the maximum size of the short section plus the
+ // maximum size of the long section
+ m_max_image_len = RDB_ENCODED_SIZE(m_max_image_len);
+ }
const auto field_var = static_cast<const Field_varstring *>(field);
m_unpack_info_uses_two_bytes = (field_var->field_length + 8 >= 0x100);
@@ -2481,8 +3176,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// - For VARBINARY(N), values may have different lengths, so we're using
// variable-length encoding. This is also the only charset where the
// values are not space-padded for comparison.
- m_unpack_func = is_varchar ? rdb_unpack_binary_or_utf8_varchar
- : rdb_unpack_binary_str;
+ m_unpack_func = is_varchar ? &Rdb_key_def::unpack_binary_or_utf8_varchar
+ : &Rdb_key_def::unpack_binary_str;
res = true;
} else if (cs->number == COLLATION_LATIN1_BIN || cs->number == COLLATION_UTF8_BIN) {
// For _bin collations, mem-comparable form of the string is the string
@@ -2492,10 +3187,10 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// VARCHARs - are compared as if they were space-padded - but are
// not actually space-padded (reading the value back produces the
// original value, without the padding)
- m_unpack_func = rdb_unpack_binary_or_utf8_varchar_space_pad;
- m_skip_func = rdb_skip_variable_space_pad;
- m_pack_func = rdb_pack_with_varchar_space_pad;
- m_make_unpack_info_func = rdb_dummy_make_unpack_info;
+ m_unpack_func = &Rdb_key_def::unpack_binary_or_utf8_varchar_space_pad;
+ m_skip_func = &Rdb_key_def::skip_variable_space_pad;
+ m_pack_func = &Rdb_key_def::pack_with_varchar_space_pad;
+ m_make_unpack_info_func = &Rdb_key_def::dummy_make_unpack_info;
m_segment_size = get_segment_size_from_collation(cs);
m_max_image_len =
(max_image_len_before_chunks / (m_segment_size - 1) + 1) *
@@ -2505,8 +3200,9 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
} else {
// SQL layer pads CHAR(N) values to their maximum length.
// We just store that and restore it back.
- m_unpack_func = (cs->number == COLLATION_LATIN1_BIN) ? rdb_unpack_binary_str
- : rdb_unpack_utf8_str;
+ m_unpack_func = (cs->number == COLLATION_LATIN1_BIN) ?
+ &Rdb_key_def::unpack_binary_str
+ : &Rdb_key_def::unpack_utf8_str;
}
res = true;
} else {
@@ -2528,8 +3224,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// Currently we handle these collations as NO_PAD, even if they have
// PAD_SPACE attribute.
if (cs->levels_for_order == 1) {
- m_pack_func = rdb_pack_with_varchar_space_pad;
- m_skip_func = rdb_skip_variable_space_pad;
+ m_pack_func = &Rdb_key_def::pack_with_varchar_space_pad;
+ m_skip_func = &Rdb_key_def::skip_variable_space_pad;
m_segment_size = get_segment_size_from_collation(cs);
m_max_image_len =
(max_image_len_before_chunks / (m_segment_size - 1) + 1) *
@@ -2544,8 +3240,8 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// NO_LINT_DEBUG
sql_print_warning("MyRocks will handle this collation internally "
" as if it had a NO_PAD attribute.");
- m_pack_func = rdb_pack_with_varchar_encoding;
- m_skip_func = rdb_skip_variable_length;
+ m_pack_func = &Rdb_key_def::pack_with_varchar_encoding;
+ m_skip_func = &Rdb_key_def::skip_variable_length;
}
}
@@ -2561,10 +3257,11 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
// form. Our way of restoring the original value is to keep a copy of
// the original value in unpack_info.
m_unpack_info_stores_value = true;
- m_make_unpack_info_func = is_varchar ? rdb_make_unpack_unknown_varchar
- : rdb_make_unpack_unknown;
- m_unpack_func =
- is_varchar ? rdb_unpack_unknown_varchar : rdb_unpack_unknown;
+ m_make_unpack_info_func =
+ is_varchar ? &Rdb_key_def::make_unpack_unknown_varchar
+ : &Rdb_key_def::make_unpack_unknown;
+ m_unpack_func = is_varchar ? &Rdb_key_def::unpack_unknown_varchar
+ : &Rdb_key_def::unpack_unknown;
} else {
// Same as above: we don't know how to restore the value from its
// mem-comparable form.
@@ -2575,27 +3272,36 @@ bool Rdb_field_packing::setup(const Rdb_key_def *const key_descr,
}
}
- // Make an adjustment: unpacking partially covered columns is not
- // possible. field->table is populated when called through
- // Rdb_key_def::setup, but not during ha_rocksdb::index_flags.
+ // Make an adjustment: if this column is partially covered, tell the SQL
+ // layer we can't do index-only scans. Later when we perform an index read,
+ // we'll check on a record-by-record basis if we can do an index-only scan
+ // or not.
+ uint field_length;
if (field->table) {
- // Get the original Field object and compare lengths. If this key part is
- // a prefix of a column, then we can't do index-only scans.
- if (field->table->field[field->field_index]->field_length != key_length) {
- m_unpack_func = nullptr;
- m_make_unpack_info_func = nullptr;
- m_unpack_info_stores_value = true;
- res = false;
- }
+ field_length = field->table->field[field->field_index]->field_length;
} else {
- if (field->field_length != key_length) {
+ field_length = field->field_length;
+ }
+
+ if (field_length != key_length) {
+ res = false;
+ // If this index doesn't support covered bitmaps, then we won't know
+ // during a read if the column is actually covered or not. If so, we need
+ // to assume the column isn't covered and skip it during unpacking.
+ //
+ // If key_descr == NULL, then this is a dummy field and we probably don't
+ // need to perform this step. However, to preserve the behavior before
+ // this change, we'll only skip this step if we have an index which
+ // supports covered bitmaps.
+ if (!key_descr || !key_descr->use_covered_bitmap_format()) {
m_unpack_func = nullptr;
m_make_unpack_info_func = nullptr;
m_unpack_info_stores_value = true;
- res = false;
}
}
}
+
+ m_covered = res;
return res;
}
@@ -2659,7 +3365,6 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
uchar flags =
(kd.m_is_reverse_cf ? Rdb_key_def::REVERSE_CF_FLAG : 0) |
- (kd.m_is_auto_cf ? Rdb_key_def::AUTO_CF_FLAG : 0) |
(kd.m_is_per_partition_cf ? Rdb_key_def::PER_PARTITION_CF_FLAG : 0);
const uint cf_id = kd.get_cf()->GetID();
@@ -2681,11 +3386,8 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
flags &= ~Rdb_key_def::CF_FLAGS_TO_IGNORE;
if (existing_cf_flags != flags) {
- my_printf_error(ER_UNKNOWN_ERROR,
- "Column family ('%s') flag (%d) is different from an "
- "existing flag (%d). Assign a new CF flag, or do not "
- "change existing CF flag.", MYF(0), cf_name.c_str(),
- flags, existing_cf_flags);
+ my_error(ER_CF_DIFFERENT, MYF(0), cf_name.c_str(), flags,
+ existing_cf_flags);
return true;
}
} else {
@@ -2694,9 +3396,16 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
rdb_netstr_append_uint32(&indexes, cf_id);
rdb_netstr_append_uint32(&indexes, kd.m_index_number);
- dict->add_or_update_index_cf_mapping(batch, kd.m_index_type,
- kd.m_kv_format_version,
- kd.m_index_number, cf_id);
+
+ struct Rdb_index_info index_info;
+ index_info.m_gl_index_id = {cf_id, kd.m_index_number};
+ index_info.m_index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST;
+ index_info.m_index_type = kd.m_index_type;
+ index_info.m_kv_version = kd.m_kv_format_version;
+ index_info.m_index_flags = kd.m_index_flags_bitmap;
+ index_info.m_ttl_duration = kd.m_ttl_duration;
+
+ dict->add_or_update_index_cf_mapping(batch, &index_info);
}
const rocksdb::Slice skey((char *)key, keylen);
@@ -2706,6 +3415,51 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
return false;
}
+// Length that each index flag takes inside the record.
+// Each index in the array maps to the enum INDEX_FLAG
+static const std::array<uint, 1> index_flag_lengths = {
+ {ROCKSDB_SIZEOF_TTL_RECORD}};
+
+bool Rdb_key_def::has_index_flag(uint32 index_flags, enum INDEX_FLAG flag) {
+ return flag & index_flags;
+}
+
+uint32 Rdb_key_def::calculate_index_flag_offset(uint32 index_flags,
+ enum INDEX_FLAG flag,
+ uint *const length) {
+
+ DBUG_ASSERT_IMP(flag != MAX_FLAG,
+ Rdb_key_def::has_index_flag(index_flags, flag));
+
+ uint offset = 0;
+ for (size_t bit = 0; bit < sizeof(index_flags) * CHAR_BIT; ++bit) {
+ int mask = 1 << bit;
+
+ /* Exit once we've reached the proper flag */
+ if (flag & mask) {
+ if (length != nullptr) {
+ *length = index_flag_lengths[bit];
+ }
+ break;
+ }
+
+ if (index_flags & mask) {
+ offset += index_flag_lengths[bit];
+ }
+ }
+
+ return offset;
+}
+
+void Rdb_key_def::write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const {
+ uint len;
+ uint offset = calculate_index_flag_offset(m_index_flags_bitmap, flag, &len);
+ DBUG_ASSERT(offset + len <= buf->get_current_pos());
+ memcpy(buf->ptr() + offset, val, len);
+}
+
void Rdb_tbl_def::check_if_is_mysql_system_table() {
static const char *const system_dbs[] = {
"mysql", "performance_schema", "information_schema",
@@ -2804,10 +3558,12 @@ struct Rdb_validate_tbls : public Rdb_tables_scanner {
int Rdb_validate_tbls::add_table(Rdb_tbl_def *tdef) {
DBUG_ASSERT(tdef != nullptr);
- /* Add the database/table into the list */
- bool is_partition = tdef->base_partition().size() != 0;
- m_list[tdef->base_dbname()].insert(
- tbl_info_t(tdef->base_tablename(), is_partition));
+ /* Add the database/table into the list that are not temp table */
+ if (tdef->base_tablename().find(tmp_file_prefix) == std::string::npos) {
+ bool is_partition = tdef->base_partition().size() != 0;
+ m_list[tdef->base_dbname()].insert(
+ tbl_info_t(tdef->base_tablename(), is_partition));
+ }
return HA_EXIT_SUCCESS;
}
@@ -2890,9 +3646,9 @@ bool Rdb_validate_tbls::scan_for_frms(const std::string &datadir,
/* Scan through the files in the directory */
struct fileinfo *file_info = dir_info->dir_entry;
for (uint ii = 0; ii < dir_info->number_of_files; ii++, file_info++) {
- /* Find .frm files that are not temp files (those that start with '#') */
+ /* Find .frm files that are not temp files (those that contain '#sql') */
const char *ext = strrchr(file_info->name, '.');
- if (ext != nullptr && !is_prefix(file_info->name, tmp_file_prefix) &&
+ if (ext != nullptr && strstr(file_info->name, tmp_file_prefix) == nullptr &&
strcmp(ext, ".frm") == 0) {
std::string tablename =
std::string(file_info->name, ext - file_info->name);
@@ -2957,7 +3713,7 @@ bool Rdb_validate_tbls::compare_to_actual_tables(const std::string &datadir,
/*
Validate that all the tables in the RocksDB database dictionary match the .frm
- files in the datdir
+ files in the datadir
*/
bool Rdb_ddl_manager::validate_schemas(void) {
bool has_errors = false;
@@ -3036,7 +3792,7 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
// Now, read the DDLs.
const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
- if (real_val_size % Rdb_key_def::PACKED_SIZE * 2) {
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
@@ -3056,12 +3812,9 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
for (uint keyno = 0; ptr < ptr_end; keyno++) {
GL_INDEX_ID gl_index_id;
rdb_netbuf_read_gl_index(&ptr, &gl_index_id);
- uint16 m_index_dict_version = 0;
- uchar m_index_type = 0;
- uint16 kv_version = 0;
uint flags = 0;
- if (!m_dict->get_index_info(gl_index_id, &m_index_dict_version,
- &m_index_type, &kv_version)) {
+ struct Rdb_index_info index_info;
+ if (!m_dict->get_index_info(gl_index_id, &index_info)) {
sql_print_error("RocksDB: Could not get index information "
"for Index Number (%u,%u), table %s",
gl_index_id.cf_id, gl_index_id.index_id,
@@ -3082,21 +3835,37 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
return true;
}
+ if ((flags & Rdb_key_def::AUTO_CF_FLAG) != 0) {
+ // The per-index cf option is deprecated. Make sure we don't have the
+ // flag set in any existing database. NO_LINT_DEBUG
+ sql_print_error("RocksDB: The defunct AUTO_CF_FLAG is enabled for CF "
+ "number %d, table %s",
+ gl_index_id.cf_id, tdef->full_tablename().c_str());
+ }
+
rocksdb::ColumnFamilyHandle *const cfh =
cf_manager->get_cf(gl_index_id.cf_id);
DBUG_ASSERT(cfh != nullptr);
+ uint32 ttl_rec_offset =
+ Rdb_key_def::has_index_flag(index_info.m_index_flags,
+ Rdb_key_def::TTL_FLAG)
+ ? Rdb_key_def::calculate_index_flag_offset(
+ index_info.m_index_flags, Rdb_key_def::TTL_FLAG)
+ : UINT_MAX;
+
/*
We can't fully initialize Rdb_key_def object here, because full
initialization requires that there is an open TABLE* where we could
look at Field* objects and set max_length and other attributes
*/
tdef->m_key_descr_arr[keyno] = std::make_shared<Rdb_key_def>(
- gl_index_id.index_id, keyno, cfh, m_index_dict_version, m_index_type,
- kv_version, flags & Rdb_key_def::REVERSE_CF_FLAG,
- flags & Rdb_key_def::AUTO_CF_FLAG,
+ gl_index_id.index_id, keyno, cfh, index_info.m_index_dict_version,
+ index_info.m_index_type, index_info.m_kv_version,
+ flags & Rdb_key_def::REVERSE_CF_FLAG,
flags & Rdb_key_def::PER_PARTITION_CF_FLAG, "",
- m_dict->get_stats(gl_index_id));
+ m_dict->get_stats(gl_index_id), index_info.m_index_flags,
+ ttl_rec_offset, index_info.m_ttl_duration);
}
put(tdef);
i++;
@@ -3123,8 +3892,7 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
m_sequence.init(max_index_id_in_dict + 1);
if (!it->status().ok()) {
- const std::string s = it->status().ToString();
- sql_print_error("RocksDB: Table_store: load error: %s", s.c_str());
+ rdb_log_status_error(it->status(), "Table_store load error");
return true;
}
delete it;
@@ -3207,6 +3975,20 @@ Rdb_ddl_manager::find(GL_INDEX_ID gl_index_id) {
return empty;
}
+// this method returns the name of the table based on an index id. It acquires
+// a read lock on m_rwlock.
+const std::string
+Rdb_ddl_manager::safe_get_table_name(const GL_INDEX_ID &gl_index_id) {
+ std::string ret;
+ mysql_rwlock_rdlock(&m_rwlock);
+ auto it = m_index_num_to_keydef.find(gl_index_id);
+ if (it != m_index_num_to_keydef.end()) {
+ ret = it->second.first;
+ }
+ mysql_rwlock_unlock(&m_rwlock);
+ return ret;
+}
+
void Rdb_ddl_manager::set_stats(
const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats) {
mysql_rwlock_wrlock(&m_rwlock);
@@ -3653,19 +4435,40 @@ void Rdb_binlog_manager::update_slave_gtid_info(
bool Rdb_dict_manager::init(rocksdb::DB *const rdb_dict,
Rdb_cf_manager *const cf_manager) {
+ DBUG_ASSERT(rdb_dict != nullptr);
+ DBUG_ASSERT(cf_manager != nullptr);
+
mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST);
+
m_db = rdb_dict;
- bool is_automatic;
- m_system_cfh = cf_manager->get_or_create_cf(m_db, DEFAULT_SYSTEM_CF_NAME, "",
- nullptr, &is_automatic);
+
+ m_system_cfh = cf_manager->get_or_create_cf(m_db, DEFAULT_SYSTEM_CF_NAME);
+ rocksdb::ColumnFamilyHandle *default_cfh =
+ cf_manager->get_cf(DEFAULT_CF_NAME);
+
+ // System CF and default CF should be initialized
+ if (m_system_cfh == nullptr || default_cfh == nullptr) {
+ return HA_EXIT_FAILURE;
+ }
+
rdb_netbuf_store_index(m_key_buf_max_index_id, Rdb_key_def::MAX_INDEX_ID);
+
m_key_slice_max_index_id =
rocksdb::Slice(reinterpret_cast<char *>(m_key_buf_max_index_id),
Rdb_key_def::INDEX_NUMBER_SIZE);
+
resume_drop_indexes();
rollback_ongoing_index_creation();
- return (m_system_cfh == nullptr);
+ // Initialize system CF and default CF flags
+ const std::unique_ptr<rocksdb::WriteBatch> wb = begin();
+ rocksdb::WriteBatch *const batch = wb.get();
+
+ add_cf_flags(batch, m_system_cfh->GetID(), 0);
+ add_cf_flags(batch, default_cfh->GetID(), 0);
+ commit(batch);
+
+ return HA_EXIT_SUCCESS;
}
std::unique_ptr<rocksdb::WriteBatch> Rdb_dict_manager::begin() const {
@@ -3700,8 +4503,8 @@ rocksdb::Iterator *Rdb_dict_manager::new_iterator() const {
int Rdb_dict_manager::commit(rocksdb::WriteBatch *const batch,
const bool &sync) const {
if (!batch)
- return HA_EXIT_FAILURE;
- int res = 0;
+ return HA_ERR_ROCKSDB_COMMIT_FAILED;
+ int res = HA_EXIT_SUCCESS;
rocksdb::WriteOptions options;
options.sync = sync;
rocksdb::Status s = m_db->Write(options, batch);
@@ -3734,22 +4537,23 @@ void Rdb_dict_manager::delete_with_prefix(
}
void Rdb_dict_manager::add_or_update_index_cf_mapping(
- rocksdb::WriteBatch *batch, const uchar m_index_type,
- const uint16_t kv_version, const uint32_t index_id,
- const uint32_t cf_id) const {
+ rocksdb::WriteBatch *batch, struct Rdb_index_info *const index_info) const {
uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 3] = {0};
uchar value_buf[256] = {0};
- GL_INDEX_ID gl_index_id = {cf_id, index_id};
- dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, gl_index_id);
+ dump_index_id(key_buf, Rdb_key_def::INDEX_INFO, index_info->m_gl_index_id);
const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
uchar *ptr = value_buf;
rdb_netbuf_store_uint16(ptr, Rdb_key_def::INDEX_INFO_VERSION_LATEST);
- ptr += 2;
- rdb_netbuf_store_byte(ptr, m_index_type);
- ptr += 1;
- rdb_netbuf_store_uint16(ptr, kv_version);
- ptr += 2;
+ ptr += RDB_SIZEOF_INDEX_INFO_VERSION;
+ rdb_netbuf_store_byte(ptr, index_info->m_index_type);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ rdb_netbuf_store_uint16(ptr, index_info->m_kv_version);
+ ptr += RDB_SIZEOF_KV_VERSION;
+ rdb_netbuf_store_uint32(ptr, index_info->m_index_flags);
+ ptr += RDB_SIZEOF_INDEX_FLAGS;
+ rdb_netbuf_store_uint64(ptr, index_info->m_ttl_duration);
+ ptr += ROCKSDB_SIZEOF_TTL_RECORD;
const rocksdb::Slice value =
rocksdb::Slice((char *)value_buf, ptr - value_buf);
@@ -3759,6 +4563,8 @@ void Rdb_dict_manager::add_or_update_index_cf_mapping(
void Rdb_dict_manager::add_cf_flags(rocksdb::WriteBatch *const batch,
const uint32_t &cf_id,
const uint32_t &cf_flags) const {
+ DBUG_ASSERT(batch != nullptr);
+
uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0};
uchar value_buf[Rdb_key_def::VERSION_SIZE + Rdb_key_def::INDEX_NUMBER_SIZE] =
{0};
@@ -3779,10 +4585,12 @@ void Rdb_dict_manager::delete_index_info(rocksdb::WriteBatch *batch,
delete_with_prefix(batch, Rdb_key_def::INDEX_STATISTICS, gl_index_id);
}
-bool Rdb_dict_manager::get_index_info(const GL_INDEX_ID &gl_index_id,
- uint16_t *m_index_dict_version,
- uchar *m_index_type,
- uint16_t *kv_version) const {
+bool Rdb_dict_manager::get_index_info(
+ const GL_INDEX_ID &gl_index_id,
+ struct Rdb_index_info *const index_info) const {
+
+ index_info->m_gl_index_id = gl_index_id;
+
bool found = false;
bool error = false;
std::string value;
@@ -3794,17 +4602,50 @@ bool Rdb_dict_manager::get_index_info(const GL_INDEX_ID &gl_index_id,
if (status.ok()) {
const uchar *const val = (const uchar *)value.c_str();
const uchar *ptr = val;
- *m_index_dict_version = rdb_netbuf_to_uint16(val);
- *kv_version = 0;
- *m_index_type = 0;
- ptr += 2;
- switch (*m_index_dict_version) {
+ index_info->m_index_dict_version = rdb_netbuf_to_uint16(val);
+ ptr += RDB_SIZEOF_INDEX_INFO_VERSION;
+
+ switch (index_info->m_index_dict_version) {
+ case Rdb_key_def::INDEX_INFO_VERSION_FIELD_FLAGS:
+ /* Sanity check to prevent reading bogus TTL record. */
+ if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
+ RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
+ RDB_SIZEOF_INDEX_FLAGS +
+ ROCKSDB_SIZEOF_TTL_RECORD) {
+ error = true;
+ break;
+ }
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ ptr += RDB_SIZEOF_KV_VERSION;
+ index_info->m_index_flags = rdb_netbuf_to_uint32(ptr);
+ ptr += RDB_SIZEOF_INDEX_FLAGS;
+ index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ found = true;
+ break;
+
+ case Rdb_key_def::INDEX_INFO_VERSION_TTL:
+ /* Sanity check to prevent reading bogus into TTL record. */
+ if (value.size() != RDB_SIZEOF_INDEX_INFO_VERSION +
+ RDB_SIZEOF_INDEX_TYPE + RDB_SIZEOF_KV_VERSION +
+ ROCKSDB_SIZEOF_TTL_RECORD) {
+ error = true;
+ break;
+ }
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
+ ptr += RDB_SIZEOF_KV_VERSION;
+ index_info->m_ttl_duration = rdb_netbuf_to_uint64(ptr);
+ found = true;
+ break;
case Rdb_key_def::INDEX_INFO_VERSION_VERIFY_KV_FORMAT:
case Rdb_key_def::INDEX_INFO_VERSION_GLOBAL_ID:
- *m_index_type = rdb_netbuf_to_byte(ptr);
- ptr += 1;
- *kv_version = rdb_netbuf_to_uint16(ptr);
+ index_info->m_index_type = rdb_netbuf_to_byte(ptr);
+ ptr += RDB_SIZEOF_INDEX_TYPE;
+ index_info->m_kv_version = rdb_netbuf_to_uint16(ptr);
found = true;
break;
@@ -3813,14 +4654,16 @@ bool Rdb_dict_manager::get_index_info(const GL_INDEX_ID &gl_index_id,
break;
}
- switch (*m_index_type) {
+ switch (index_info->m_index_type) {
case Rdb_key_def::INDEX_TYPE_PRIMARY:
case Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY: {
- error = *kv_version > Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST;
+ error =
+ index_info->m_kv_version > Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST;
break;
}
case Rdb_key_def::INDEX_TYPE_SECONDARY:
- error = *kv_version > Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST;
+ error = index_info->m_kv_version >
+ Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST;
break;
default:
error = true;
@@ -3830,10 +4673,12 @@ bool Rdb_dict_manager::get_index_info(const GL_INDEX_ID &gl_index_id,
if (error) {
// NO_LINT_DEBUG
- sql_print_error("RocksDB: Found invalid key version number (%u, %u, %u) "
- "from data dictionary. This should never happen "
- "and it may be a bug.",
- *m_index_dict_version, *m_index_type, *kv_version);
+ sql_print_error(
+ "RocksDB: Found invalid key version number (%u, %u, %u, %llu) "
+ "from data dictionary. This should never happen "
+ "and it may be a bug.",
+ index_info->m_index_dict_version, index_info->m_index_type,
+ index_info->m_kv_version, index_info->m_ttl_duration);
abort_with_stack_traces();
}
@@ -3842,22 +4687,31 @@ bool Rdb_dict_manager::get_index_info(const GL_INDEX_ID &gl_index_id,
bool Rdb_dict_manager::get_cf_flags(const uint32_t &cf_id,
uint32_t *const cf_flags) const {
+ DBUG_ASSERT(cf_flags != nullptr);
+
bool found = false;
std::string value;
uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2] = {0};
+
rdb_netbuf_store_uint32(key_buf, Rdb_key_def::CF_DEFINITION);
rdb_netbuf_store_uint32(key_buf + Rdb_key_def::INDEX_NUMBER_SIZE, cf_id);
- const rocksdb::Slice key = rocksdb::Slice((char *)key_buf, sizeof(key_buf));
+ const rocksdb::Slice key =
+ rocksdb::Slice(reinterpret_cast<char *>(key_buf), sizeof(key_buf));
const rocksdb::Status status = get_value(key, &value);
+
if (status.ok()) {
const uchar *val = (const uchar *)value.c_str();
- uint16_t version = rdb_netbuf_to_uint16(val);
+ DBUG_ASSERT(val);
+
+ const uint16_t version = rdb_netbuf_to_uint16(val);
+
if (version == Rdb_key_def::CF_DEFINITION_VERSION) {
*cf_flags = rdb_netbuf_to_uint32(val + Rdb_key_def::VERSION_SIZE);
found = true;
}
}
+
return found;
}
@@ -4046,13 +4900,6 @@ void Rdb_dict_manager::finish_indexes_operation(
for (const auto &gl_index_id : gl_index_ids) {
if (is_index_operation_ongoing(gl_index_id, dd_type)) {
- // NO_LINT_DEBUG
- sql_print_information("RocksDB: Finished %s (%u,%u)",
- dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING
- ? "filtering dropped index"
- : "index creation",
- gl_index_id.cf_id, gl_index_id.index_id);
-
end_ongoing_index_operation(batch, gl_index_id, dd_type);
/*
@@ -4128,12 +4975,8 @@ void Rdb_dict_manager::log_start_drop_table(
void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id,
const char *log_action) const {
- uint16 m_index_dict_version = 0;
- uchar m_index_type = 0;
- uint16 kv_version = 0;
-
- if (!get_index_info(gl_index_id, &m_index_dict_version, &m_index_type,
- &kv_version)) {
+ struct Rdb_index_info index_info;
+ if (!get_index_info(gl_index_id, &index_info)) {
/*
If we don't find the index info, it could be that it's because it was a
partially created index that isn't in the data dictionary yet that needs
@@ -4151,8 +4994,6 @@ void Rdb_dict_manager::log_start_drop_index(GL_INDEX_ID gl_index_id,
abort_with_stack_traces();
}
}
- sql_print_information("RocksDB: %s filtering dropped index (%u,%u)",
- log_action, gl_index_id.cf_id, gl_index_id.index_id);
}
bool Rdb_dict_manager::get_max_index_id(uint32_t *const index_id) const {
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 2ffd3f6e8db..b54a5960797 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -72,24 +72,24 @@ public:
};
struct Rdb_collation_codec;
+struct Rdb_index_info;
/*
C-style "virtual table" allowing different handling of packing logic based
on the field type. See Rdb_field_packing::setup() implementation.
*/
-using rdb_make_unpack_info_t = void (*)(const Rdb_collation_codec *codec,
- const Field *field,
- Rdb_pack_field_context *pack_ctx);
-using rdb_index_field_unpack_t = int (*)(Rdb_field_packing *fpi, Field *field,
- uchar *field_ptr,
- Rdb_string_reader *reader,
- Rdb_string_reader *unpack_reader);
-using rdb_index_field_skip_t = int (*)(const Rdb_field_packing *fpi,
- const Field *field,
- Rdb_string_reader *reader);
-using rdb_index_field_pack_t = void (*)(Rdb_field_packing *fpi, Field *field,
- uchar *buf, uchar **dst,
- Rdb_pack_field_context *pack_ctx);
+using rdb_make_unpack_info_t =
+ void (Rdb_key_def::*)(const Rdb_collation_codec *codec, const Field *field,
+ Rdb_pack_field_context *pack_ctx) const;
+using rdb_index_field_unpack_t = int (Rdb_key_def::*)(
+ Rdb_field_packing *fpi, Field *field, uchar *field_ptr,
+ Rdb_string_reader *reader, Rdb_string_reader *unpack_reader) const;
+using rdb_index_field_skip_t =
+ int (Rdb_key_def::*)(const Rdb_field_packing *fpi, const Field *field,
+ Rdb_string_reader *reader) const;
+using rdb_index_field_pack_t =
+ void (Rdb_key_def::*)(Rdb_field_packing *fpi, Field *field, uchar *buf,
+ uchar **dst, Rdb_pack_field_context *pack_ctx) const;
const uint RDB_INVALID_KEY_LEN = uint(-1);
@@ -109,14 +109,33 @@ const size_t RDB_CHECKSUM_CHUNK_SIZE = 2 * RDB_CHECKSUM_SIZE + 1;
const char RDB_CHECKSUM_DATA_TAG = 0x01;
/*
- Unpack data is variable length. It is a 1 tag-byte plus a
- two byte length field. The length field includes the header as well.
+ Unpack data is variable length. The header is 1 tag-byte plus a two byte
+ length field. The length field includes the header as well.
*/
const char RDB_UNPACK_DATA_TAG = 0x02;
const size_t RDB_UNPACK_DATA_LEN_SIZE = sizeof(uint16_t);
const size_t RDB_UNPACK_HEADER_SIZE =
sizeof(RDB_UNPACK_DATA_TAG) + RDB_UNPACK_DATA_LEN_SIZE;
+/*
+ This header format is 1 tag-byte plus a two byte length field plus a two byte
+ covered bitmap. The length field includes the header size.
+*/
+const char RDB_UNPACK_COVERED_DATA_TAG = 0x03;
+const size_t RDB_UNPACK_COVERED_DATA_LEN_SIZE = sizeof(uint16_t);
+const size_t RDB_COVERED_BITMAP_SIZE = sizeof(uint16_t);
+const size_t RDB_UNPACK_COVERED_HEADER_SIZE =
+ sizeof(RDB_UNPACK_COVERED_DATA_TAG) + RDB_UNPACK_COVERED_DATA_LEN_SIZE +
+ RDB_COVERED_BITMAP_SIZE;
+
+/*
+ Data dictionary index info field sizes.
+*/
+const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16);
+const size_t RDB_SIZEOF_INDEX_TYPE = sizeof(uchar);
+const size_t RDB_SIZEOF_KV_VERSION = sizeof(uint16);
+const size_t RDB_SIZEOF_INDEX_FLAGS = sizeof(uint32);
+
// Possible return values for rdb_index_field_unpack_t functions.
enum {
UNPACK_SUCCESS = 0,
@@ -170,20 +189,20 @@ public:
uchar *const packed_tuple, const uchar *const key_tuple,
const key_part_map &keypart_map) const;
- uchar *pack_field(Field *const field,
- Rdb_field_packing *pack_info,
- uchar * tuple,
- uchar *const packed_tuple,
- uchar *const pack_buffer,
+ uchar *pack_field(Field *const field, Rdb_field_packing *pack_info,
+ uchar *tuple, uchar *const packed_tuple,
+ uchar *const pack_buffer,
Rdb_string_writer *const unpack_info,
- uint *const n_null_fields) const;
+ uint *const n_null_fields) const;
/* Convert a key from Table->record format to mem-comparable form */
uint pack_record(const TABLE *const tbl, uchar *const pack_buffer,
const uchar *const record, uchar *const packed_tuple,
Rdb_string_writer *const unpack_info,
const bool &should_store_row_debug_checksums,
const longlong &hidden_pk_id = 0, uint n_key_parts = 0,
- uint *const n_null_fields = nullptr) const;
+ uint *const n_null_fields = nullptr,
+ uint *const ttl_pk_offset = nullptr,
+ const char *const ttl_bytes = nullptr) const;
/* Pack the hidden primary key into mem-comparable form. */
uint pack_hidden_pk(const longlong &hidden_pk_id,
uchar *const packed_tuple) const;
@@ -241,6 +260,17 @@ public:
return true;
}
+ void get_lookup_bitmap(const TABLE *table, MY_BITMAP *map) const;
+
+ bool covers_lookup(TABLE *const table,
+ const rocksdb::Slice *const unpack_info,
+ const MY_BITMAP *const map) const;
+
+ inline bool use_covered_bitmap_format() const {
+ return m_index_type == INDEX_TYPE_SECONDARY &&
+ m_kv_format_version >= SECONDARY_FORMAT_VERSION_UPDATE3;
+ }
+
/*
Return true if the passed mem-comparable key
- is from this index, and
@@ -278,6 +308,8 @@ public:
uint get_key_parts() const { return m_key_parts; }
+ uint get_ttl_field_offset() const { return m_ttl_field_offset; }
+
/*
Get a field object for key part #part_no
@@ -296,14 +328,17 @@ public:
return m_prefix_extractor.get();
}
+ static size_t get_unpack_header_size(char tag);
+
Rdb_key_def &operator=(const Rdb_key_def &) = delete;
Rdb_key_def(const Rdb_key_def &k);
Rdb_key_def(uint indexnr_arg, uint keyno_arg,
rocksdb::ColumnFamilyHandle *cf_handle_arg,
uint16_t index_dict_version_arg, uchar index_type_arg,
uint16_t kv_format_version_arg, bool is_reverse_cf_arg,
- bool is_auto_cf_arg, bool is_per_partition_cf, const char *name,
- Rdb_index_stats stats = Rdb_index_stats());
+ bool is_per_partition_cf, const char *name,
+ Rdb_index_stats stats = Rdb_index_stats(), uint32 index_flags = 0,
+ uint32 ttl_rec_offset = UINT_MAX, uint64 ttl_duration = 0);
~Rdb_key_def();
enum {
@@ -317,10 +352,20 @@ public:
// bit flags for combining bools when writing to disk
enum {
REVERSE_CF_FLAG = 1,
- AUTO_CF_FLAG = 2,
+ AUTO_CF_FLAG = 2, // Deprecated
PER_PARTITION_CF_FLAG = 4,
};
+ // bit flags which denote myrocks specific fields stored in the record
+ // currently only used for TTL.
+ enum INDEX_FLAG {
+ TTL_FLAG = 1 << 0,
+
+ // MAX_FLAG marks where the actual record starts
+ // This flag always needs to be set to the last index flag enum.
+ MAX_FLAG = TTL_FLAG << 1,
+ };
+
// Set of flags to ignore when comparing two CF-s and determining if
// they're same.
static const uint CF_FLAGS_TO_IGNORE = PER_PARTITION_CF_FLAG;
@@ -354,7 +399,7 @@ public:
// INDEX_INFO layout. Update INDEX_INFO_VERSION_LATEST to point to the
// latest version number.
enum {
- INDEX_INFO_VERSION_INITIAL = 1, // Obsolete
+ INDEX_INFO_VERSION_INITIAL = 1, // Obsolete
INDEX_INFO_VERSION_KV_FORMAT,
INDEX_INFO_VERSION_GLOBAL_ID,
// There is no change to data format in this version, but this version
@@ -362,8 +407,14 @@ public:
// bump is needed to prevent older binaries from skipping the KV version
// check inadvertently.
INDEX_INFO_VERSION_VERIFY_KV_FORMAT,
+ // This changes the data format to include a 8 byte TTL duration for tables
+ INDEX_INFO_VERSION_TTL,
+ // This changes the data format to include a bitmap before the TTL duration
+ // which will indicate in the future whether TTL or other special fields
+ // are turned on or off.
+ INDEX_INFO_VERSION_FIELD_FLAGS,
// This normally point to the latest (currently it does).
- INDEX_INFO_VERSION_LATEST = INDEX_INFO_VERSION_VERIFY_KV_FORMAT,
+ INDEX_INFO_VERSION_LATEST = INDEX_INFO_VERSION_FIELD_FLAGS,
};
// MyRocks index types
@@ -383,16 +434,71 @@ public:
// it can be decoded from its mem-comparable form)
// - VARCHAR-columns use endspace-padding.
PRIMARY_FORMAT_VERSION_UPDATE1 = 11,
- PRIMARY_FORMAT_VERSION_LATEST = PRIMARY_FORMAT_VERSION_UPDATE1,
+ // This change includes:
+ // - Binary encoded variable length fields have a new format that avoids
+ // an inefficient where data that was a multiple of 8 bytes in length
+ // had an extra 9 bytes of encoded data.
+ PRIMARY_FORMAT_VERSION_UPDATE2 = 12,
+ // This change includes support for TTL
+ // - This means that when TTL is specified for the table an 8-byte TTL
+ // field is prepended in front of each value.
+ PRIMARY_FORMAT_VERSION_TTL = 13,
+ PRIMARY_FORMAT_VERSION_LATEST = PRIMARY_FORMAT_VERSION_TTL,
SECONDARY_FORMAT_VERSION_INITIAL = 10,
// This change the SK format to include unpack_info.
SECONDARY_FORMAT_VERSION_UPDATE1 = 11,
- SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_UPDATE1,
+ // This change includes:
+ // - Binary encoded variable length fields have a new format that avoids
+ // an inefficient where data that was a multiple of 8 bytes in length
+ // had an extra 9 bytes of encoded data.
+ SECONDARY_FORMAT_VERSION_UPDATE2 = 12,
+ // This change includes support for TTL
+ // - This means that when TTL is specified for the table an 8-byte TTL
+ // field is prepended in front of each value.
+ SECONDARY_FORMAT_VERSION_TTL = 13,
+ SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_TTL,
+ // This change includes support for covering SK lookups for varchars. A
+ // 2-byte bitmap is added after the tag-byte to unpack_info only for
+ // records which have covered varchar columns. Currently waiting before
+ // enabling in prod.
+ SECONDARY_FORMAT_VERSION_UPDATE3 = 65535,
};
void setup(const TABLE *const table, const Rdb_tbl_def *const tbl_def);
+ static uint extract_ttl_duration(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ uint64 *ttl_duration);
+ static uint extract_ttl_col(const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg,
+ std::string *ttl_column, uint *ttl_field_offset,
+ bool skip_checks = false);
+ inline bool has_ttl() const { return m_ttl_duration > 0; }
+
+ static bool has_index_flag(uint32 index_flags, enum INDEX_FLAG flag);
+ static uint32 calculate_index_flag_offset(uint32 index_flags,
+ enum INDEX_FLAG flag,
+ uint *const field_length = nullptr);
+ void write_index_flag_field(Rdb_string_writer *const buf,
+ const uchar *const val,
+ enum INDEX_FLAG flag) const;
+
+ static const std::string
+ gen_qualifier_for_table(const char *const qualifier,
+ const std::string &partition_name = "");
+ static const std::string
+ gen_cf_name_qualifier_for_partition(const std::string &s);
+ static const std::string
+ gen_ttl_duration_qualifier_for_partition(const std::string &s);
+ static const std::string
+ gen_ttl_col_qualifier_for_partition(const std::string &s);
+
+ static const std::string parse_comment_for_qualifier(
+ const std::string &comment, const TABLE *const table_arg,
+ const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found,
+ const char *const qualifier);
+
rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; }
/* Check if keypart #kp can be unpacked from index tuple */
@@ -410,7 +516,130 @@ public:
or at least sk_min if SK.*/
bool index_format_min_check(const int &pk_min, const int &sk_min) const;
-private:
+ void pack_with_make_sort_key(
+ Rdb_field_packing *const fpi, Field *const field,
+ uchar *buf MY_ATTRIBUTE((__unused__)), uchar **dst,
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const;
+
+ void pack_with_varchar_encoding(
+ Rdb_field_packing *const fpi, Field *const field, uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx MY_ATTRIBUTE((__unused__))) const;
+
+ void
+ pack_with_varchar_space_pad(Rdb_field_packing *const fpi, Field *const field,
+ uchar *buf, uchar **dst,
+ Rdb_pack_field_context *const pack_ctx) const;
+
+ int unpack_integer(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const to, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__))) const;
+
+ int unpack_double(Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const field_ptr, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__))) const;
+
+ int unpack_float(Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const field_ptr, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__))) const;
+
+ int unpack_binary_str(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const to, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__))) const;
+
+ int unpack_binary_or_utf8_varchar(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader MY_ATTRIBUTE((__unused__))) const;
+
+ int unpack_binary_or_utf8_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const;
+
+ int unpack_newdate(Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const field_ptr, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__))) const;
+
+ int unpack_utf8_str(Rdb_field_packing *const fpi, Field *const field,
+ uchar *dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader
+ MY_ATTRIBUTE((__unused__))) const;
+
+ int unpack_unknown_varchar(Rdb_field_packing *const fpi, Field *const field,
+ uchar *dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const;
+
+ int unpack_simple_varchar_space_pad(
+ Rdb_field_packing *const fpi, Field *const field, uchar *dst,
+ Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const;
+
+ int unpack_simple(Rdb_field_packing *const fpi,
+ Field *const field MY_ATTRIBUTE((__unused__)),
+ uchar *const dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const;
+
+ int unpack_unknown(Rdb_field_packing *const fpi, Field *const field,
+ uchar *const dst, Rdb_string_reader *const reader,
+ Rdb_string_reader *const unp_reader) const;
+
+ int unpack_floating_point(uchar *const dst, Rdb_string_reader *const reader,
+ const size_t &size, const int &exp_digit,
+ const uchar *const zero_pattern,
+ const uchar *const zero_val,
+ void (*swap_func)(uchar *, const uchar *)) const;
+
+ void make_unpack_simple_varchar(const Rdb_collation_codec *const codec,
+ const Field *const field,
+ Rdb_pack_field_context *const pack_ctx) const;
+
+ void make_unpack_simple(const Rdb_collation_codec *const codec,
+ const Field *const field,
+ Rdb_pack_field_context *const pack_ctx) const;
+
+ void make_unpack_unknown(
+ const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) const;
+
+ void make_unpack_unknown_varchar(
+ const Rdb_collation_codec *const codec MY_ATTRIBUTE((__unused__)),
+ const Field *const field, Rdb_pack_field_context *const pack_ctx) const;
+
+ void dummy_make_unpack_info(
+ const Rdb_collation_codec *codec MY_ATTRIBUTE((__unused__)),
+ const Field *field MY_ATTRIBUTE((__unused__)),
+ Rdb_pack_field_context *pack_ctx MY_ATTRIBUTE((__unused__))) const;
+
+ int skip_max_length(const Rdb_field_packing *const fpi,
+ const Field *const field MY_ATTRIBUTE((__unused__)),
+ Rdb_string_reader *const reader) const;
+
+ int skip_variable_length(
+ const Rdb_field_packing *const fpi MY_ATTRIBUTE((__unused__)),
+ const Field *const field, Rdb_string_reader *const reader) const;
+
+ int skip_variable_space_pad(const Rdb_field_packing *const fpi,
+ const Field *const field,
+ Rdb_string_reader *const reader) const;
+
+ inline bool use_legacy_varbinary_format() const {
+ return !index_format_min_check(PRIMARY_FORMAT_VERSION_UPDATE2,
+ SECONDARY_FORMAT_VERSION_UPDATE2);
+ }
+
+ static inline bool is_unpack_data_tag(char c) {
+ return c == RDB_UNPACK_DATA_TAG || c == RDB_UNPACK_COVERED_DATA_TAG;
+ }
+
+ private:
#ifndef DBUG_OFF
inline bool is_storage_available(const int &offset, const int &needed) const {
const int storage_length = static_cast<int>(max_storage_fmt_length());
@@ -425,7 +654,17 @@ private:
rocksdb::ColumnFamilyHandle *m_cf_handle;
-public:
+ void pack_legacy_variable_format(const uchar *src, size_t src_len,
+ uchar **dst) const;
+
+ void pack_variable_format(const uchar *src, size_t src_len,
+ uchar **dst) const;
+
+ uint calc_unpack_legacy_variable_format(uchar flag, bool *done) const;
+
+ uint calc_unpack_variable_format(uchar flag, bool *done) const;
+
+ public:
uint16_t m_index_dict_version;
uchar m_index_type;
/* KV format version for the index id */
@@ -433,15 +672,35 @@ public:
/* If true, the column family stores data in the reverse order */
bool m_is_reverse_cf;
- bool m_is_auto_cf;
-
/* If true, then column family is created per partition. */
bool m_is_per_partition_cf;
std::string m_name;
mutable Rdb_index_stats m_stats;
-private:
+ /*
+ Bitmap containing information about whether TTL or other special fields
+ are enabled for the given index.
+ */
+ uint32 m_index_flags_bitmap;
+
+ /*
+ How much space in bytes the index flag fields occupy.
+ */
+ uint32 m_total_index_flags_length;
+
+ /*
+ Offset in the records where the 8-byte TTL is stored (UINT_MAX if no TTL)
+ */
+ uint32 m_ttl_rec_offset;
+
+ /* Default TTL duration */
+ uint64 m_ttl_duration;
+
+ /* TTL column (if defined by user, otherwise implicit TTL is used) */
+ std::string m_ttl_column;
+
+ private:
friend class Rdb_tbl_def; // for m_index_number above
/* Number of key parts in the primary key*/
@@ -464,6 +723,18 @@ private:
*/
uint m_key_parts;
+ /*
+ If TTL column is part of the PK, offset of the column within pk.
+ Default is UINT_MAX to denote that TTL col is not part of PK.
+ */
+ uint m_ttl_pk_key_part_offset;
+
+ /*
+ Index of the TTL column in table->s->fields, if it exists.
+ Default is UINT_MAX to denote that it does not exist.
+ */
+ uint m_ttl_field_offset;
+
/* Prefix extractor for the column family of the key definiton */
std::shared_ptr<const rocksdb::SliceTransform> m_prefix_extractor;
@@ -536,6 +807,13 @@ public:
// spaces in the upack_info
bool m_unpack_info_uses_two_bytes;
+ /*
+ True implies that an index-only read is always possible for this field.
+ False means an index-only read may be possible depending on the record and
+ field type.
+ */
+ bool m_covered;
+
const std::vector<uchar> *space_xfrm;
size_t space_xfrm_len;
size_t space_mb_len;
@@ -812,6 +1090,8 @@ public:
return m_sequence.get_and_update_next_number(dict);
}
+ const std::string safe_get_table_name(const GL_INDEX_ID &gl_index_id);
+
/* Walk the data dictionary */
int scan_for_tables(Rdb_tables_scanner *tables_scanner);
@@ -895,12 +1175,13 @@ private:
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
- value: version, index_type, kv_format_version
+ value: version, index_type, kv_format_version, ttl_duration
index_type is 1 byte, version and kv_format_version are 2 bytes.
+ ttl_duration is 8 bytes.
3. CF id => CF flags
key: Rdb_key_def::CF_DEFINITION(0x3) + cf_id
- value: version, {is_reverse_cf, is_auto_cf, is_per_partition_cf}
+ value: version, {is_reverse_cf, is_auto_cf (deprecated), is_per_partition_cf}
cf_flags is 4 bytes in total.
4. Binlog entry (updated at commit)
@@ -967,6 +1248,10 @@ public:
inline void unlock() { RDB_MUTEX_UNLOCK_CHECK(m_mutex); }
+ inline rocksdb::ColumnFamilyHandle *get_system_cf() const {
+ return m_system_cfh;
+ }
+
/* Raw RocksDB operations */
std::unique_ptr<rocksdb::WriteBatch> begin() const;
int commit(rocksdb::WriteBatch *const batch, const bool &sync = true) const;
@@ -979,16 +1264,13 @@ public:
rocksdb::Iterator *new_iterator() const;
/* Internal Index id => CF */
- void add_or_update_index_cf_mapping(rocksdb::WriteBatch *batch,
- const uchar index_type,
- const uint16_t kv_version,
- const uint index_id,
- const uint cf_id) const;
+ void
+ add_or_update_index_cf_mapping(rocksdb::WriteBatch *batch,
+ struct Rdb_index_info *const index_info) const;
void delete_index_info(rocksdb::WriteBatch *batch,
const GL_INDEX_ID &index_id) const;
bool get_index_info(const GL_INDEX_ID &gl_index_id,
- uint16_t *index_dict_version, uchar *index_type,
- uint16_t *kv_version) const;
+ struct Rdb_index_info *const index_info) const;
/* CF id => CF flags */
void add_cf_flags(rocksdb::WriteBatch *const batch, const uint &cf_id,
@@ -1066,4 +1348,13 @@ public:
Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const;
};
+struct Rdb_index_info {
+ GL_INDEX_ID m_gl_index_id;
+ uint16_t m_index_dict_version = 0;
+ uchar m_index_type = 0;
+ uint16_t m_kv_version = 0;
+ uint32 m_index_flags = 0;
+ uint64 m_ttl_duration = 0;
+};
+
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc
index ca6fc914d4a..1e32fc352db 100644
--- a/storage/rocksdb/rdb_i_s.cc
+++ b/storage/rocksdb/rdb_i_s.cc
@@ -17,6 +17,8 @@
#include <my_config.h>
/* C++ standard header files */
+#include <algorithm>
+#include <cctype>
#include <map>
#include <string>
#include <vector>
@@ -39,6 +41,7 @@
#include "./rdb_cf_manager.h"
#include "./rdb_datadic.h"
#include "./rdb_utils.h"
+#include "./rdb_mariadb_server_port.h"
namespace myrocks {
@@ -71,7 +74,11 @@ static int rdb_i_s_cfstats_fill_table(
my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
DBUG_ENTER_FUNC();
- bool ret;
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ int ret = 0;
uint64_t val;
const std::vector<std::pair<const std::string, std::string>> cf_properties = {
@@ -93,26 +100,24 @@ static int rdb_i_s_cfstats_fill_table(
{rocksdb::DB::Properties::kNumLiveVersions, "NUM_LIVE_VERSIONS"}};
rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
- DBUG_ASSERT(rdb != nullptr);
for (const auto &cf_name : cf_manager.get_cf_names()) {
- rocksdb::ColumnFamilyHandle *cfh;
- bool is_automatic;
-
- /*
- Only the cf name is important. Whether it was generated automatically
- does not matter, so is_automatic is ignored.
- */
- cfh = cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic);
- if (cfh == nullptr)
+ DBUG_ASSERT(!cf_name.empty());
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+ if (cfh == nullptr) {
continue;
+ }
for (const auto &property : cf_properties) {
- if (!rdb->GetIntProperty(cfh, property.first, &val))
+ if (!rdb->GetIntProperty(cfh, property.first, &val)) {
continue;
-
- DBUG_ASSERT(tables != nullptr);
+ }
tables->table->field[RDB_CFSTATS_FIELD::CF_NAME]->store(
cf_name.c_str(), cf_name.size(), system_charset_info);
@@ -120,12 +125,15 @@ static int rdb_i_s_cfstats_fill_table(
property.second.c_str(), property.second.size(), system_charset_info);
tables->table->field[RDB_CFSTATS_FIELD::VALUE]->store(val, true);
- ret = my_core::schema_table_store_record(thd, tables->table);
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
- if (ret)
+ if (ret) {
DBUG_RETURN(ret);
+ }
}
}
+
DBUG_RETURN(0);
}
@@ -161,7 +169,11 @@ static int rdb_i_s_dbstats_fill_table(
my_core::Item *const cond MY_ATTRIBUTE((__unused__))) {
DBUG_ENTER_FUNC();
- bool ret;
+ DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
+
+ int ret = 0;
uint64_t val;
const std::vector<std::pair<std::string, std::string>> db_properties = {
@@ -171,23 +183,29 @@ static int rdb_i_s_dbstats_fill_table(
"DB_OLDEST_SNAPSHOT_TIME"}};
rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
const rocksdb::BlockBasedTableOptions &table_options =
rdb_get_table_options();
for (const auto &property : db_properties) {
- if (!rdb->GetIntProperty(property.first, &val))
+ if (!rdb->GetIntProperty(property.first, &val)) {
continue;
-
- DBUG_ASSERT(tables != nullptr);
+ }
tables->table->field[RDB_DBSTATS_FIELD::STAT_TYPE]->store(
property.second.c_str(), property.second.size(), system_charset_info);
tables->table->field[RDB_DBSTATS_FIELD::VALUE]->store(val, true);
- ret = my_core::schema_table_store_record(thd, tables->table);
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
- if (ret)
+ if (ret) {
DBUG_RETURN(ret);
+ }
}
/*
@@ -201,11 +219,13 @@ static int rdb_i_s_dbstats_fill_table(
information from the column family.
*/
val = (table_options.block_cache ? table_options.block_cache->GetUsage() : 0);
+
tables->table->field[RDB_DBSTATS_FIELD::STAT_TYPE]->store(
STRING_WITH_LEN("DB_BLOCK_CACHE_USAGE"), system_charset_info);
tables->table->field[RDB_DBSTATS_FIELD::VALUE]->store(val, true);
- ret = my_core::schema_table_store_record(thd, tables->table);
+ ret =
+ static_cast<int>(my_core::schema_table_store_record(thd, tables->table));
DBUG_RETURN(ret);
}
@@ -248,17 +268,28 @@ static int rdb_i_s_perf_context_fill_table(
DBUG_ASSERT(thd != nullptr);
DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
int ret = 0;
Field **field = tables->table->field;
+ DBUG_ASSERT(field != nullptr);
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
const std::vector<std::string> tablenames = rdb_get_open_table_names();
+
for (const auto &it : tablenames) {
std::string str, dbname, tablename, partname;
Rdb_perf_counters counters;
- if (rdb_normalize_tablename(it, &str)) {
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ int rc = rdb_normalize_tablename(it, &str);
+
+ if (rc != HA_EXIT_SUCCESS) {
+ DBUG_RETURN(rc);
}
if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) {
@@ -269,12 +300,11 @@ static int rdb_i_s_perf_context_fill_table(
continue;
}
- DBUG_ASSERT(field != nullptr);
-
field[RDB_PERF_CONTEXT_FIELD::TABLE_SCHEMA]->store(
dbname.c_str(), dbname.size(), system_charset_info);
field[RDB_PERF_CONTEXT_FIELD::TABLE_NAME]->store(
tablename.c_str(), tablename.size(), system_charset_info);
+
if (partname.size() == 0) {
field[RDB_PERF_CONTEXT_FIELD::PARTITION_NAME]->set_null();
} else {
@@ -289,9 +319,12 @@ static int rdb_i_s_perf_context_fill_table(
system_charset_info);
field[RDB_PERF_CONTEXT_FIELD::VALUE]->store(counters.m_value[i], true);
- ret = my_core::schema_table_store_record(thd, tables->table);
- if (ret)
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret) {
DBUG_RETURN(ret);
+ }
}
}
@@ -332,26 +365,34 @@ static int rdb_i_s_perf_context_global_fill_table(
DBUG_ASSERT(thd != nullptr);
DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
// Get a copy of the global perf counters.
Rdb_perf_counters global_counters;
rdb_get_global_perf_counters(&global_counters);
for (int i = 0; i < PC_MAX_IDX; i++) {
- DBUG_ASSERT(tables->table != nullptr);
- DBUG_ASSERT(tables->table->field != nullptr);
-
tables->table->field[RDB_PERF_CONTEXT_GLOBAL_FIELD::STAT_TYPE]->store(
rdb_pc_stat_types[i].c_str(), rdb_pc_stat_types[i].size(),
system_charset_info);
tables->table->field[RDB_PERF_CONTEXT_GLOBAL_FIELD::VALUE]->store(
global_counters.m_value[i], true);
- ret = my_core::schema_table_store_record(thd, tables->table);
- if (ret)
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
+ if (ret) {
DBUG_RETURN(ret);
+ }
}
DBUG_RETURN(0);
@@ -393,13 +434,21 @@ static int rdb_i_s_cfoptions_fill_table(
DBUG_ASSERT(thd != nullptr);
DBUG_ASSERT(tables != nullptr);
- bool ret;
+ int ret = 0;
+
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
for (const auto &cf_name : cf_manager.get_cf_names()) {
std::string val;
rocksdb::ColumnFamilyOptions opts;
+
+ DBUG_ASSERT(!cf_name.empty());
cf_manager.get_cf_options(cf_name, &opts);
std::vector<std::pair<std::string, std::string>> cf_option_types = {
@@ -469,29 +518,37 @@ static int rdb_i_s_cfoptions_fill_table(
// get MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL option value
val = opts.max_bytes_for_level_multiplier_additional.empty() ? "NULL" : "";
+
for (const auto &level : opts.max_bytes_for_level_multiplier_additional) {
val.append(std::to_string(level) + ":");
}
+
val.pop_back();
cf_option_types.push_back(
{"MAX_BYTES_FOR_LEVEL_MULTIPLIER_ADDITIONAL", val});
// get COMPRESSION_TYPE option value
GetStringFromCompressionType(&val, opts.compression);
+
if (val.empty()) {
val = "NULL";
}
+
cf_option_types.push_back({"COMPRESSION_TYPE", val});
// get COMPRESSION_PER_LEVEL option value
val = opts.compression_per_level.empty() ? "NULL" : "";
+
for (const auto &compression_type : opts.compression_per_level) {
std::string res;
+
GetStringFromCompressionType(&res, compression_type);
+
if (!res.empty()) {
val.append(res + ":");
}
}
+
val.pop_back();
cf_option_types.push_back({"COMPRESSION_PER_LEVEL", val});
@@ -499,12 +556,15 @@ static int rdb_i_s_cfoptions_fill_table(
val = std::to_string(opts.compression_opts.window_bits) + ":";
val.append(std::to_string(opts.compression_opts.level) + ":");
val.append(std::to_string(opts.compression_opts.strategy));
+
cf_option_types.push_back({"COMPRESSION_OPTS", val});
// bottommost_compression
if (opts.bottommost_compression) {
std::string res;
+
GetStringFromCompressionType(&res, opts.bottommost_compression);
+
if (!res.empty()) {
cf_option_types.push_back({"BOTTOMMOST_COMPRESSION", res});
}
@@ -533,12 +593,15 @@ static int rdb_i_s_cfoptions_fill_table(
default:
val = "NULL";
}
+
cf_option_types.push_back({"COMPACTION_STYLE", val});
// get COMPACTION_OPTIONS_UNIVERSAL related options
const rocksdb::CompactionOptionsUniversal compac_opts =
opts.compaction_options_universal;
+
val = "{SIZE_RATIO=";
+
val.append(std::to_string(compac_opts.size_ratio));
val.append("; MIN_MERGE_WIDTH=");
val.append(std::to_string(compac_opts.min_merge_width));
@@ -549,6 +612,7 @@ static int rdb_i_s_cfoptions_fill_table(
val.append("; COMPRESSION_SIZE_PERCENT=");
val.append(std::to_string(compac_opts.compression_size_percent));
val.append("; STOP_STYLE=");
+
switch (compac_opts.stop_style) {
case rocksdb::kCompactionStopStyleSimilarSize:
val.append("kCompactionStopStyleSimilarSize}");
@@ -559,6 +623,7 @@ static int rdb_i_s_cfoptions_fill_table(
default:
val.append("}");
}
+
cf_option_types.push_back({"COMPACTION_OPTIONS_UNIVERSAL", val});
// get COMPACTION_OPTION_FIFO option
@@ -566,96 +631,24 @@ static int rdb_i_s_cfoptions_fill_table(
{"COMPACTION_OPTION_FIFO::MAX_TABLE_FILES_SIZE",
std::to_string(opts.compaction_options_fifo.max_table_files_size)});
- // get block-based table related options
- const rocksdb::BlockBasedTableOptions &table_options =
- rdb_get_table_options();
+ // get table related options
+ std::vector<std::string> table_options =
+ split_into_vector(opts.table_factory->GetPrintableTableOptions(), '\n');
- // get BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS option
- cf_option_types.push_back(
- {"BLOCK_BASED_TABLE_FACTORY::CACHE_INDEX_AND_FILTER_BLOCKS",
- table_options.cache_index_and_filter_blocks ? "1" : "0"});
+ for (auto option : table_options) {
+ option.erase(std::remove(option.begin(), option.end(), ' '),
+ option.end());
- // get BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE option value
- switch (table_options.index_type) {
- case rocksdb::BlockBasedTableOptions::kBinarySearch:
- val = "kBinarySearch";
- break;
- case rocksdb::BlockBasedTableOptions::kHashSearch:
- val = "kHashSearch";
- break;
- default:
- val = "NULL";
- }
- cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::INDEX_TYPE", val});
-
- // get BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION option value
- cf_option_types.push_back(
- {"BLOCK_BASED_TABLE_FACTORY::HASH_INDEX_ALLOW_COLLISION",
- table_options.hash_index_allow_collision ? "ON" : "OFF"});
+ int pos = option.find(":");
+ std::string option_name = option.substr(0, pos);
+ std::string option_value = option.substr(pos + 1, option.length());
+ std::transform(option_name.begin(), option_name.end(),
+ option_name.begin(),
+ [](unsigned char c) { return std::toupper(c); });
- // get BLOCK_BASED_TABLE_FACTORY::CHECKSUM option value
- switch (table_options.checksum) {
- case rocksdb::kNoChecksum:
- val = "kNoChecksum";
- break;
- case rocksdb::kCRC32c:
- val = "kCRC32c";
- break;
- case rocksdb::kxxHash:
- val = "kxxHash";
- break;
- default:
- val = "NULL";
+ cf_option_types.push_back(
+ {"TABLE_FACTORY::" + option_name, option_value});
}
- cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::CHECKSUM", val});
-
- // get BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE option value
- cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::NO_BLOCK_CACHE",
- table_options.no_block_cache ? "ON" : "OFF"});
-
- // get BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY option
- cf_option_types.push_back(
- {"BLOCK_BASED_TABLE_FACTORY::FILTER_POLICY",
- table_options.filter_policy == nullptr
- ? "NULL"
- : std::string(table_options.filter_policy->Name())});
-
- // get BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING option
- cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::WHOLE_KEY_FILTERING",
- table_options.whole_key_filtering ? "1" : "0"});
-
- // get BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE option
- cf_option_types.push_back(
- {"BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE",
- table_options.block_cache == nullptr
- ? "NULL"
- : std::to_string(table_options.block_cache->GetUsage())});
-
- // get BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED option
- cf_option_types.push_back(
- {"BLOCK_BASED_TABLE_FACTORY::BLOCK_CACHE_COMPRESSED",
- table_options.block_cache_compressed == nullptr
- ? "NULL"
- : std::to_string(
- table_options.block_cache_compressed->GetUsage())});
-
- // get BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE option
- cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE",
- std::to_string(table_options.block_size)});
-
- // get BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION option
- cf_option_types.push_back(
- {"BLOCK_BASED_TABLE_FACTORY::BLOCK_SIZE_DEVIATION",
- std::to_string(table_options.block_size_deviation)});
-
- // get BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL option
- cf_option_types.push_back(
- {"BLOCK_BASED_TABLE_FACTORY::BLOCK_RESTART_INTERVAL",
- std::to_string(table_options.block_restart_interval)});
-
- // get BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION option
- cf_option_types.push_back({"BLOCK_BASED_TABLE_FACTORY::FORMAT_VERSION",
- std::to_string(table_options.format_version)});
for (const auto &cf_option_type : cf_option_types) {
DBUG_ASSERT(tables->table != nullptr);
@@ -670,12 +663,15 @@ static int rdb_i_s_cfoptions_fill_table(
cf_option_type.second.c_str(), cf_option_type.second.size(),
system_charset_info);
- ret = my_core::schema_table_store_record(thd, tables->table);
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
- if (ret)
+ if (ret) {
DBUG_RETURN(ret);
+ }
}
}
+
DBUG_RETURN(0);
}
@@ -736,6 +732,12 @@ static int rdb_i_s_global_info_fill_table(
int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
/* binlog info */
Rdb_binlog_manager *const blm = rdb_get_binlog_manager();
DBUG_ASSERT(blm != nullptr);
@@ -747,6 +749,7 @@ static int rdb_i_s_global_info_fill_table(
if (blm->read(file_buf, &pos, gtid_buf)) {
snprintf(pos_buf, INT_BUF_LEN, "%llu", (ulonglong)pos);
+
ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "FILE", file_buf);
ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "POS", pos_buf);
ret |= rdb_global_info_fill_row(thd, tables, "BINLOG", "GTID", gtid_buf);
@@ -761,6 +764,7 @@ static int rdb_i_s_global_info_fill_table(
if (dict_manager->get_max_index_id(&max_index_id)) {
snprintf(max_index_id_buf, INT_BUF_LEN, "%u", max_index_id);
+
ret |= rdb_global_info_fill_row(thd, tables, "MAX_INDEX_ID", "MAX_INDEX_ID",
max_index_id_buf);
}
@@ -769,17 +773,31 @@ static int rdb_i_s_global_info_fill_table(
char cf_id_buf[INT_BUF_LEN] = {0};
char cf_value_buf[FN_REFLEN + 1] = {0};
const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
for (const auto &cf_handle : cf_manager.get_all_cf()) {
+ DBUG_ASSERT(cf_handle != nullptr);
+
uint flags;
- dict_manager->get_cf_flags(cf_handle->GetID(), &flags);
+
+ if (!dict_manager->get_cf_flags(cf_handle->GetID(), &flags)) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Failed to get column family flags "
+ "from CF with id = %u. MyRocks data dictionary may "
+ "be corrupted.",
+ cf_handle->GetID());
+ abort_with_stack_traces();
+ }
+
snprintf(cf_id_buf, INT_BUF_LEN, "%u", cf_handle->GetID());
snprintf(cf_value_buf, FN_REFLEN, "%s [%u]", cf_handle->GetName().c_str(),
flags);
+
ret |= rdb_global_info_fill_row(thd, tables, "CF_FLAGS", cf_id_buf,
cf_value_buf);
- if (ret)
+ if (ret) {
break;
+ }
}
/* DDL_DROP_INDEX_ONGOING */
@@ -787,14 +805,17 @@ static int rdb_i_s_global_info_fill_table(
dict_manager->get_ongoing_index_operation(
&gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING);
char cf_id_index_buf[CF_ID_INDEX_BUF_LEN] = {0};
+
for (auto gl_index_id : gl_index_ids) {
snprintf(cf_id_index_buf, CF_ID_INDEX_BUF_LEN, "cf_id:%u,index_id:%u",
gl_index_id.cf_id, gl_index_id.index_id);
+
ret |= rdb_global_info_fill_row(thd, tables, "DDL_DROP_INDEX_ONGOING",
cf_id_index_buf, "");
- if (ret)
+ if (ret) {
break;
+ }
}
DBUG_RETURN(ret);
@@ -812,43 +833,50 @@ static int rdb_i_s_compact_stats_fill_table(
DBUG_ENTER_FUNC();
int ret = 0;
-
rocksdb::DB *rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
+
Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
- DBUG_ASSERT(rdb != nullptr);
for (auto cf_name : cf_manager.get_cf_names()) {
- rocksdb::ColumnFamilyHandle *cfh;
- bool is_automatic;
- /*
- Only the cf name is important. Whether it was generated automatically
- does not matter, so is_automatic is ignored.
- */
- cfh = cf_manager.get_cf(cf_name.c_str(), "", nullptr, &is_automatic);
+ rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name);
+
if (cfh == nullptr) {
continue;
}
- std::map<std::string, double> props;
+
+ std::map<std::string, std::string> props;
bool bool_ret MY_ATTRIBUTE((__unused__));
bool_ret = rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props);
DBUG_ASSERT(bool_ret);
+ const std::string prop_name_prefix = "compaction.";
for (auto const &prop_ent : props) {
std::string prop_name = prop_ent.first;
- double value = prop_ent.second;
- std::size_t del_pos = prop_name.find('.');
+ if (prop_name.find(prop_name_prefix) != 0) {
+ continue;
+ }
+ std::string value = prop_ent.second;
+ std::size_t del_pos = prop_name.find('.', prop_name_prefix.size());
DBUG_ASSERT(del_pos != std::string::npos);
- std::string level_str = prop_name.substr(0, del_pos);
+ std::string level_str = prop_name.substr(
+ prop_name_prefix.size(), del_pos - prop_name_prefix.size());
std::string type_str = prop_name.substr(del_pos + 1);
Field **field = tables->table->field;
DBUG_ASSERT(field != nullptr);
+
field[0]->store(cf_name.c_str(), cf_name.size(), system_charset_info);
field[1]->store(level_str.c_str(), level_str.size(), system_charset_info);
field[2]->store(type_str.c_str(), type_str.size(), system_charset_info);
- field[3]->store(value, true);
+ field[3]->store(std::stod(value));
+
+ ret |= static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
- ret |= my_core::schema_table_store_record(thd, tables->table);
if (ret != 0) {
DBUG_RETURN(ret);
}
@@ -963,14 +991,24 @@ static int rdb_i_s_ddl_fill_table(my_core::THD *const thd,
DBUG_ASSERT(thd != nullptr);
DBUG_ASSERT(tables != nullptr);
+ DBUG_ASSERT(tables->table != nullptr);
+
+ int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
Rdb_ddl_scanner ddl_arg;
+
ddl_arg.m_thd = thd;
ddl_arg.m_table = tables->table;
Rdb_ddl_manager *ddl_manager = rdb_get_ddl_manager();
DBUG_ASSERT(ddl_manager != nullptr);
- int ret = ddl_manager->scan_for_tables(&ddl_arg);
+
+ ret = ddl_manager->scan_for_tables(&ddl_arg);
DBUG_RETURN(ret);
}
@@ -1105,14 +1143,19 @@ static int rdb_i_s_index_file_map_fill_table(
/* Iterate over all the column families */
rocksdb::DB *const rdb = rdb_get_rocksdb_db();
- DBUG_ASSERT(rdb != nullptr);
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
const Rdb_cf_manager &cf_manager = rdb_get_cf_manager();
+
for (const auto &cf_handle : cf_manager.get_all_cf()) {
/* Grab the the properties of all the tables in the column family */
rocksdb::TablePropertiesCollection table_props_collection;
const rocksdb::Status s =
rdb->GetPropertiesOfAllTables(cf_handle, &table_props_collection);
+
if (!s.ok()) {
continue;
}
@@ -1122,12 +1165,14 @@ static int rdb_i_s_index_file_map_fill_table(
for (const auto &props : table_props_collection) {
/* Add the SST name into the output */
const std::string sst_name = rdb_filename_without_path(props.first);
+
field[RDB_INDEX_FILE_MAP_FIELD::SST_NAME]->store(
sst_name.data(), sst_name.size(), system_charset_info);
/* Get the __indexstats__ data out of the table property */
std::vector<Rdb_index_stats> stats;
Rdb_tbl_prop_coll::read_stats_from_tbl_props(props.second, &stats);
+
if (stats.empty()) {
field[RDB_INDEX_FILE_MAP_FIELD::COLUMN_FAMILY]->store(-1, true);
field[RDB_INDEX_FILE_MAP_FIELD::INDEX_NUMBER]->store(-1, true);
@@ -1138,7 +1183,7 @@ static int rdb_i_s_index_file_map_fill_table(
field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_MERGES]->store(-1, true);
field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_OTHERS]->store(-1, true);
} else {
- for (auto it : stats) {
+ for (const auto &it : stats) {
/* Add the index number, the number of rows, and data size to the
* output */
field[RDB_INDEX_FILE_MAP_FIELD::COLUMN_FAMILY]->store(
@@ -1156,12 +1201,14 @@ static int rdb_i_s_index_file_map_fill_table(
it.m_entry_merges, true);
field[RDB_INDEX_FILE_MAP_FIELD::ENTRY_OTHERS]->store(
it.m_entry_others, true);
+
std::string distinct_keys_prefix;
for (size_t i = 0; i < it.m_distinct_keys_per_prefix.size(); i++) {
if (i > 0) {
distinct_keys_prefix += ",";
}
+
distinct_keys_prefix +=
std::to_string(it.m_distinct_keys_per_prefix[i]);
}
@@ -1171,7 +1218,9 @@ static int rdb_i_s_index_file_map_fill_table(
system_charset_info);
/* Tell MySQL about this row in the virtual table */
- ret = my_core::schema_table_store_record(thd, tables->table);
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
if (ret != 0) {
break;
}
@@ -1223,11 +1272,15 @@ static int rdb_i_s_lock_info_fill_table(
DBUG_ASSERT(thd != nullptr);
DBUG_ASSERT(tables != nullptr);
DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
int ret = 0;
rocksdb::TransactionDB *const rdb = rdb_get_rocksdb_db();
- DBUG_ASSERT(rdb != nullptr);
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
/* cf id -> rocksdb::KeyLockInfo */
std::unordered_multimap<uint32_t, rocksdb::KeyLockInfo> lock_info =
@@ -1250,12 +1303,15 @@ static int rdb_i_s_lock_info_fill_table(
key_lock_info.exclusive ? "X" : "S", 1, system_charset_info);
/* Tell MySQL about this row in the virtual table */
- ret = my_core::schema_table_store_record(thd, tables->table);
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
if (ret != 0) {
break;
}
}
}
+
DBUG_RETURN(ret);
}
@@ -1330,8 +1386,14 @@ static int rdb_i_s_trx_info_fill_table(
DBUG_ASSERT(thd != nullptr);
DBUG_ASSERT(tables != nullptr);
DBUG_ASSERT(tables->table != nullptr);
+ DBUG_ASSERT(tables->table->field != nullptr);
int ret = 0;
+ rocksdb::DB *const rdb = rdb_get_rocksdb_db();
+
+ if (!rdb) {
+ DBUG_RETURN(ret);
+ }
const std::vector<Rdb_trx_info> &all_trx_info = rdb_get_all_trx_info();
@@ -1340,6 +1402,7 @@ static int rdb_i_s_trx_info_fill_table(
rdb_hexdump(info.name.c_str(), info.name.length(), NAME_LEN);
auto key_hexstr = rdb_hexdump(info.waiting_key.c_str(),
info.waiting_key.length(), FN_REFLEN);
+
tables->table->field[RDB_TRX_FIELD::TRANSACTION_ID]->store(info.trx_id,
true);
tables->table->field[RDB_TRX_FIELD::STATE]->store(
@@ -1371,7 +1434,9 @@ static int rdb_i_s_trx_info_fill_table(
info.query_str.c_str(), info.query_str.length(), system_charset_info);
/* Tell MySQL about this row in the virtual table */
- ret = my_core::schema_table_store_record(thd, tables->table);
+ ret = static_cast<int>(
+ my_core::schema_table_store_record(thd, tables->table));
+
if (ret != 0) {
break;
}
diff --git a/storage/rocksdb/rdb_index_merge.cc b/storage/rocksdb/rdb_index_merge.cc
index b2bab1f4a18..44cec7c2a9c 100644
--- a/storage/rocksdb/rdb_index_merge.cc
+++ b/storage/rocksdb/rdb_index_merge.cc
@@ -31,17 +31,38 @@ namespace myrocks {
Rdb_index_merge::Rdb_index_merge(const char *const tmpfile_path,
const ulonglong &merge_buf_size,
const ulonglong &merge_combine_read_size,
- const rocksdb::Comparator *const comparator)
+ const ulonglong &merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf)
: m_tmpfile_path(tmpfile_path), m_merge_buf_size(merge_buf_size),
m_merge_combine_read_size(merge_combine_read_size),
- m_comparator(comparator), m_rec_buf_unsorted(nullptr),
- m_output_buf(nullptr) {}
+ m_merge_tmp_file_removal_delay(merge_tmp_file_removal_delay),
+ m_cf_handle(cf), m_rec_buf_unsorted(nullptr), m_output_buf(nullptr) {}
Rdb_index_merge::~Rdb_index_merge() {
/*
- Close tmp file, we don't need to worry about deletion, mysql handles it.
+ If merge_tmp_file_removal_delay is set, sleep between calls to chsize.
+
+ This helps mitigate potential trim stalls on flash when large files are
+ being deleted too quickly.
+ */
+ if (m_merge_tmp_file_removal_delay > 0) {
+ uint64 curr_size = m_merge_buf_size * m_merge_file.m_num_sort_buffers;
+ for (uint i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ if (my_chsize(m_merge_file.m_fd, curr_size, 0, MYF(MY_WME))) {
+ // NO_LINT_DEBUG
+ sql_print_error("Error truncating file during fast index creation.");
+ }
+
+ my_sleep(m_merge_tmp_file_removal_delay * 1000);
+ curr_size -= m_merge_buf_size;
+ }
+ }
+
+ /*
+ Close file descriptor, we don't need to worry about deletion,
+ mysql handles it.
*/
- my_close(m_merge_file.fd, MYF(MY_WME));
+ my_close(m_merge_file.m_fd, MYF(MY_WME));
}
int Rdb_index_merge::init() {
@@ -50,7 +71,7 @@ int Rdb_index_merge::init() {
inplace index creation.
*/
if (merge_file_create()) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/*
@@ -75,7 +96,7 @@ int Rdb_index_merge::init() {
Create a merge file in the given location.
*/
int Rdb_index_merge::merge_file_create() {
- DBUG_ASSERT(m_merge_file.fd == -1);
+ DBUG_ASSERT(m_merge_file.m_fd == -1);
int fd;
#ifdef MARIAROCKS_NOT_YET // mysql_tmpfile_path use
@@ -89,11 +110,13 @@ int Rdb_index_merge::merge_file_create() {
fd = mysql_tmpfile("myrocks");
#endif
if (fd < 0) {
- return HA_ERR_INTERNAL_ERROR;
+ // NO_LINT_DEBUG
+ sql_print_error("Failed to create temp file during fast index creation.");
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
- m_merge_file.fd = fd;
- m_merge_file.num_sort_buffers = 0;
+ m_merge_file.m_fd = fd;
+ m_merge_file.m_num_sort_buffers = 0;
return HA_EXIT_SUCCESS;
}
@@ -114,10 +137,10 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
out to disk in sorted order using offset tree.
*/
const uint total_offset = RDB_MERGE_CHUNK_LEN +
- m_rec_buf_unsorted->curr_offset +
+ m_rec_buf_unsorted->m_curr_offset +
RDB_MERGE_KEY_DELIMITER + RDB_MERGE_VAL_DELIMITER +
key.size() + val.size();
- if (total_offset >= m_rec_buf_unsorted->total_size) {
+ if (total_offset >= m_rec_buf_unsorted->m_total_size) {
/*
If the offset tree is empty here, that means that the proposed key to
add is too large for the buffer.
@@ -126,17 +149,17 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
// NO_LINT_DEBUG
sql_print_error("Sort buffer size is too small to process merge. "
"Please set merge buffer size to a higher value.");
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
if (merge_buf_write()) {
// NO_LINT_DEBUG
sql_print_error("Error writing sort buffer to disk.");
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
}
- const ulonglong rec_offset = m_rec_buf_unsorted->curr_offset;
+ const ulonglong rec_offset = m_rec_buf_unsorted->m_curr_offset;
/*
Store key and value in temporary unsorted in memory buffer pointed to by
@@ -145,8 +168,15 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
m_rec_buf_unsorted->store_key_value(key, val);
/* Find sort order of the new record */
- m_offset_tree.emplace(m_rec_buf_unsorted->block.get() + rec_offset,
- m_comparator);
+ auto res =
+ m_offset_tree.emplace(m_rec_buf_unsorted->m_block.get() + rec_offset,
+ m_cf_handle->GetComparator());
+ if (!res.second) {
+ my_printf_error(ER_DUP_ENTRY,
+ "Failed to insert the record: the key already exists",
+ MYF(0));
+ return ER_DUP_ENTRY;
+ }
return HA_EXIT_SUCCESS;
}
@@ -155,33 +185,33 @@ int Rdb_index_merge::add(const rocksdb::Slice &key, const rocksdb::Slice &val) {
Sort + write merge buffer chunk out to disk.
*/
int Rdb_index_merge::merge_buf_write() {
- DBUG_ASSERT(m_merge_file.fd != -1);
+ DBUG_ASSERT(m_merge_file.m_fd != -1);
DBUG_ASSERT(m_rec_buf_unsorted != nullptr);
DBUG_ASSERT(m_output_buf != nullptr);
DBUG_ASSERT(!m_offset_tree.empty());
/* Write actual chunk size to first 8 bytes of the merge buffer */
- merge_store_uint64(m_output_buf->block.get(),
- m_rec_buf_unsorted->curr_offset + RDB_MERGE_CHUNK_LEN);
- m_output_buf->curr_offset += RDB_MERGE_CHUNK_LEN;
+ merge_store_uint64(m_output_buf->m_block.get(),
+ m_rec_buf_unsorted->m_curr_offset + RDB_MERGE_CHUNK_LEN);
+ m_output_buf->m_curr_offset += RDB_MERGE_CHUNK_LEN;
/*
Iterate through the offset tree. Should be ordered by the secondary key
at this point.
*/
for (const auto &rec : m_offset_tree) {
- DBUG_ASSERT(m_output_buf->curr_offset <= m_merge_buf_size);
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_merge_buf_size);
/* Read record from offset (should never fail) */
rocksdb::Slice key;
rocksdb::Slice val;
- merge_read_rec(rec.block, &key, &val);
+ merge_read_rec(rec.m_block, &key, &val);
/* Store key and value into sorted output buffer */
m_output_buf->store_key_value(key, val);
}
- DBUG_ASSERT(m_output_buf->curr_offset <= m_output_buf->total_size);
+ DBUG_ASSERT(m_output_buf->m_curr_offset <= m_output_buf->m_total_size);
/*
Write output buffer to disk.
@@ -189,11 +219,12 @@ int Rdb_index_merge::merge_buf_write() {
Need to position cursor to the chunk it needs to be at on filesystem
then write into the respective merge buffer.
*/
- if (my_seek(m_merge_file.fd, m_merge_file.num_sort_buffers * m_merge_buf_size,
- SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ if (my_seek(m_merge_file.m_fd,
+ m_merge_file.m_num_sort_buffers * m_merge_buf_size, SEEK_SET,
+ MYF(0)) == MY_FILEPOS_ERROR) {
// NO_LINT_DEBUG
sql_print_error("Error seeking to location in merge file on disk.");
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/*
@@ -201,16 +232,16 @@ int Rdb_index_merge::merge_buf_write() {
cache can flush out all of the files at the same time, causing a write
burst.
*/
- if (my_write(m_merge_file.fd, m_output_buf->block.get(),
- m_output_buf->total_size, MYF(MY_WME | MY_NABP)) ||
- mysql_file_sync(m_merge_file.fd, MYF(MY_WME))) {
+ if (my_write(m_merge_file.m_fd, m_output_buf->m_block.get(),
+ m_output_buf->m_total_size, MYF(MY_WME | MY_NABP)) ||
+ mysql_file_sync(m_merge_file.m_fd, MYF(MY_WME))) {
// NO_LINT_DEBUG
sql_print_error("Error writing sorted merge buffer to disk.");
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/* Increment merge file offset to track number of merge buffers written */
- m_merge_file.num_sort_buffers += 1;
+ m_merge_file.m_num_sort_buffers += 1;
/* Reset everything for next run */
merge_reset();
@@ -230,34 +261,35 @@ int Rdb_index_merge::merge_heap_prepare() {
be written to disk. Write them out now.
*/
if (!m_offset_tree.empty() && merge_buf_write()) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
- DBUG_ASSERT(m_merge_file.num_sort_buffers > 0);
+ DBUG_ASSERT(m_merge_file.m_num_sort_buffers > 0);
/*
For an n-way merge, we need to read chunks of each merge file
simultaneously.
*/
ulonglong chunk_size =
- m_merge_combine_read_size / m_merge_file.num_sort_buffers;
+ m_merge_combine_read_size / m_merge_file.m_num_sort_buffers;
if (chunk_size >= m_merge_buf_size) {
chunk_size = m_merge_buf_size;
}
/* Allocate buffers for each chunk */
- for (ulonglong i = 0; i < m_merge_file.num_sort_buffers; i++) {
- const auto entry = std::make_shared<merge_heap_entry>(m_comparator);
+ for (ulonglong i = 0; i < m_merge_file.m_num_sort_buffers; i++) {
+ const auto entry =
+ std::make_shared<merge_heap_entry>(m_cf_handle->GetComparator());
/*
Read chunk_size bytes from each chunk on disk, and place inside
respective chunk buffer.
*/
const size_t total_size =
- entry->prepare(m_merge_file.fd, i * m_merge_buf_size, chunk_size);
+ entry->prepare(m_merge_file.m_fd, i * m_merge_buf_size, chunk_size);
if (total_size == (size_t)-1) {
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/* Can reach this condition if an index was added on table w/ no rows */
@@ -266,10 +298,10 @@ int Rdb_index_merge::merge_heap_prepare() {
}
/* Read the first record from each buffer to initially populate the heap */
- if (entry->read_rec(&entry->key, &entry->val)) {
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
// NO_LINT_DEBUG
sql_print_error("Chunk size is too small to process merge.");
- return HA_ERR_INTERNAL_ERROR;
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
m_merge_min_heap.push(std::move(entry));
@@ -291,7 +323,7 @@ int Rdb_index_merge::next(rocksdb::Slice *const key,
If there are no sort buffer records (alters on empty tables),
also exit here.
*/
- if (m_merge_file.num_sort_buffers == 0) {
+ if (m_merge_file.m_num_sort_buffers == 0) {
if (m_offset_tree.empty()) {
return -1;
}
@@ -299,7 +331,7 @@ int Rdb_index_merge::next(rocksdb::Slice *const key,
const auto rec = m_offset_tree.begin();
/* Read record from offset */
- merge_read_rec(rec->block, key, val);
+ merge_read_rec(rec->m_block, key, val);
m_offset_tree.erase(rec);
return HA_EXIT_SUCCESS;
@@ -339,8 +371,8 @@ void Rdb_index_merge::merge_heap_top(rocksdb::Slice *const key,
DBUG_ASSERT(!m_merge_min_heap.empty());
const std::shared_ptr<merge_heap_entry> &entry = m_merge_min_heap.top();
- *key = entry->key;
- *val = entry->val;
+ *key = entry->m_key;
+ *val = entry->m_val;
}
/**
@@ -360,12 +392,12 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
/*
We are finished w/ current chunk if:
- current_offset + disk_offset == total_size
+ current_offset + disk_offset == m_total_size
Return without adding entry back onto heap.
If heap is also empty, we must be finished with merge.
*/
- if (entry->chunk_info->is_chunk_finished()) {
+ if (entry->m_chunk_info->is_chunk_finished()) {
if (m_merge_min_heap.empty()) {
return -1;
}
@@ -377,20 +409,20 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
/*
Make sure we haven't reached the end of the chunk.
*/
- DBUG_ASSERT(!entry->chunk_info->is_chunk_finished());
+ DBUG_ASSERT(!entry->m_chunk_info->is_chunk_finished());
/*
If merge_read_rec fails, it means the either the chunk was cut off
or we've reached the end of the respective chunk.
*/
- if (entry->read_rec(&entry->key, &entry->val)) {
- if (entry->read_next_chunk_from_disk(m_merge_file.fd)) {
- return HA_ERR_INTERNAL_ERROR;
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
+ if (entry->read_next_chunk_from_disk(m_merge_file.m_fd)) {
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
/* Try reading record again, should never fail. */
- if (entry->read_rec(&entry->key, &entry->val)) {
- return HA_ERR_INTERNAL_ERROR;
+ if (entry->read_rec(&entry->m_key, &entry->m_val)) {
+ return HA_ERR_ROCKSDB_MERGE_FILE_ERR;
}
}
@@ -403,32 +435,33 @@ int Rdb_index_merge::merge_heap_pop_and_get_next(rocksdb::Slice *const key,
}
int Rdb_index_merge::merge_heap_entry::read_next_chunk_from_disk(File fd) {
- if (chunk_info->read_next_chunk_from_disk(fd)) {
+ if (m_chunk_info->read_next_chunk_from_disk(fd)) {
return HA_EXIT_FAILURE;
}
- block = chunk_info->block.get();
+ m_block = m_chunk_info->m_block.get();
return HA_EXIT_SUCCESS;
}
int Rdb_index_merge::merge_buf_info::read_next_chunk_from_disk(File fd) {
- disk_curr_offset += curr_offset;
+ m_disk_curr_offset += m_curr_offset;
- if (my_seek(fd, disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
+ if (my_seek(fd, m_disk_curr_offset, SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR) {
// NO_LINT_DEBUG
sql_print_error("Error seeking to location in merge file on disk.");
return HA_EXIT_FAILURE;
}
/* Overwrite the old block */
- const size_t bytes_read = my_read(fd, block.get(), block_len, MYF(MY_WME));
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_block_len, MYF(MY_WME));
if (bytes_read == (size_t)-1) {
// NO_LINT_DEBUG
sql_print_error("Error reading merge file from disk.");
return HA_EXIT_FAILURE;
}
- curr_offset = 0;
+ m_curr_offset = 0;
return HA_EXIT_SUCCESS;
}
@@ -464,39 +497,39 @@ void Rdb_index_merge::read_slice(rocksdb::Slice *slice,
int Rdb_index_merge::merge_heap_entry::read_rec(rocksdb::Slice *const key,
rocksdb::Slice *const val) {
- const uchar *block_ptr = block;
- const auto orig_offset = chunk_info->curr_offset;
- const auto orig_block = block;
+ const uchar *block_ptr = m_block;
+ const auto orig_offset = m_chunk_info->m_curr_offset;
+ const auto orig_block = m_block;
/* Read key at block offset into key slice and the value into value slice*/
if (read_slice(key, &block_ptr) != 0) {
return HA_EXIT_FAILURE;
}
- chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block;
- block += (uintptr_t)block_ptr - (uintptr_t)block;
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
if (read_slice(val, &block_ptr) != 0) {
- chunk_info->curr_offset = orig_offset;
- block = orig_block;
+ m_chunk_info->m_curr_offset = orig_offset;
+ m_block = orig_block;
return HA_EXIT_FAILURE;
}
- chunk_info->curr_offset += (uintptr_t)block_ptr - (uintptr_t)block;
- block += (uintptr_t)block_ptr - (uintptr_t)block;
+ m_chunk_info->m_curr_offset += (uintptr_t)block_ptr - (uintptr_t)m_block;
+ m_block += (uintptr_t)block_ptr - (uintptr_t)m_block;
return HA_EXIT_SUCCESS;
}
int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice,
const uchar **block_ptr) {
- if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) {
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER)) {
return HA_EXIT_FAILURE;
}
uint64 slice_len;
merge_read_uint64(block_ptr, &slice_len);
- if (!chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) {
+ if (!m_chunk_info->has_space(RDB_MERGE_REC_DELIMITER + slice_len)) {
return HA_EXIT_FAILURE;
}
@@ -508,18 +541,18 @@ int Rdb_index_merge::merge_heap_entry::read_slice(rocksdb::Slice *const slice,
size_t Rdb_index_merge::merge_heap_entry::prepare(File fd, ulonglong f_offset,
ulonglong chunk_size) {
- chunk_info = std::make_shared<merge_buf_info>(chunk_size);
- const size_t res = chunk_info->prepare(fd, f_offset);
+ m_chunk_info = std::make_shared<merge_buf_info>(chunk_size);
+ const size_t res = m_chunk_info->prepare(fd, f_offset);
if (res != (size_t)-1) {
- block = chunk_info->block.get() + RDB_MERGE_CHUNK_LEN;
+ m_block = m_chunk_info->m_block.get() + RDB_MERGE_CHUNK_LEN;
}
return res;
}
size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
- disk_start_offset = f_offset;
- disk_curr_offset = f_offset;
+ m_disk_start_offset = f_offset;
+ m_disk_curr_offset = f_offset;
/*
Need to position cursor to the chunk it needs to be at on filesystem
@@ -531,7 +564,8 @@ size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
return (size_t)-1;
}
- const size_t bytes_read = my_read(fd, block.get(), total_size, MYF(MY_WME));
+ const size_t bytes_read =
+ my_read(fd, m_block.get(), m_total_size, MYF(MY_WME));
if (bytes_read == (size_t)-1) {
// NO_LINT_DEBUG
sql_print_error("Error reading merge file from disk.");
@@ -542,10 +576,10 @@ size_t Rdb_index_merge::merge_buf_info::prepare(File fd, ulonglong f_offset) {
Read the first 8 bytes of each chunk, this gives us the actual
size of each chunk.
*/
- const uchar *block_ptr = block.get();
- merge_read_uint64(&block_ptr, &total_size);
- curr_offset += RDB_MERGE_CHUNK_LEN;
- return total_size;
+ const uchar *block_ptr = m_block.get();
+ merge_read_uint64(&block_ptr, &m_total_size);
+ m_curr_offset += RDB_MERGE_CHUNK_LEN;
+ return m_total_size;
}
/* Store key and value w/ their respective delimiters at the given offset */
@@ -557,13 +591,13 @@ void Rdb_index_merge::merge_buf_info::store_key_value(
void Rdb_index_merge::merge_buf_info::store_slice(const rocksdb::Slice &slice) {
/* Store length delimiter */
- merge_store_uint64(&block[curr_offset], slice.size());
+ merge_store_uint64(&m_block[m_curr_offset], slice.size());
/* Store slice data */
- memcpy(&block[curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(),
+ memcpy(&m_block[m_curr_offset + RDB_MERGE_REC_DELIMITER], slice.data(),
slice.size());
- curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER;
+ m_curr_offset += slice.size() + RDB_MERGE_REC_DELIMITER;
}
void Rdb_index_merge::merge_reset() {
@@ -574,13 +608,13 @@ void Rdb_index_merge::merge_reset() {
m_offset_tree.clear();
/* Reset sort buffer block */
- if (m_rec_buf_unsorted && m_rec_buf_unsorted->block) {
- m_rec_buf_unsorted->curr_offset = 0;
+ if (m_rec_buf_unsorted && m_rec_buf_unsorted->m_block) {
+ m_rec_buf_unsorted->m_curr_offset = 0;
}
/* Reset output buf */
- if (m_output_buf && m_output_buf->block) {
- m_output_buf->curr_offset = 0;
+ if (m_output_buf && m_output_buf->m_block) {
+ m_output_buf->m_curr_offset = 0;
}
}
diff --git a/storage/rocksdb/rdb_index_merge.h b/storage/rocksdb/rdb_index_merge.h
index 9d1469fc34e..6e53663160a 100644
--- a/storage/rocksdb/rdb_index_merge.h
+++ b/storage/rocksdb/rdb_index_merge.h
@@ -49,23 +49,23 @@ class Rdb_index_merge {
Rdb_index_merge(const Rdb_index_merge &p) = delete;
Rdb_index_merge &operator=(const Rdb_index_merge &p) = delete;
-public:
+ public:
/* Information about temporary files used in external merge sort */
struct merge_file_info {
- File fd = -1; /* file descriptor */
- ulong num_sort_buffers; /* number of sort buffers in temp file */
+ File m_fd = -1; /* file descriptor */
+ ulong m_num_sort_buffers = 0; /* number of sort buffers in temp file */
};
/* Buffer for sorting in main memory. */
struct merge_buf_info {
/* heap memory allocated for main memory sort/merge */
- std::unique_ptr<uchar[]> block;
+ std::unique_ptr<uchar[]> m_block;
const ulonglong
- block_len; /* amount of data bytes allocated for block above */
- ulonglong curr_offset; /* offset of the record pointer for the block */
- ulonglong disk_start_offset; /* where the chunk starts on disk */
- ulonglong disk_curr_offset; /* current offset on disk */
- ulonglong total_size; /* total # of data bytes in chunk */
+ m_block_len; /* amount of data bytes allocated for block above */
+ ulonglong m_curr_offset; /* offset of the record pointer for the block */
+ ulonglong m_disk_start_offset; /* where the chunk starts on disk */
+ ulonglong m_disk_curr_offset; /* current offset on disk */
+ ulonglong m_total_size; /* total # of data bytes in chunk */
void store_key_value(const rocksdb::Slice &key, const rocksdb::Slice &val)
MY_ATTRIBUTE((__nonnull__));
@@ -78,32 +78,33 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
inline bool is_chunk_finished() const {
- return curr_offset + disk_curr_offset - disk_start_offset == total_size;
+ return m_curr_offset + m_disk_curr_offset - m_disk_start_offset ==
+ m_total_size;
}
inline bool has_space(uint64 needed) const {
- return curr_offset + needed <= block_len;
+ return m_curr_offset + needed <= m_block_len;
}
explicit merge_buf_info(const ulonglong merge_block_size)
- : block(nullptr), block_len(merge_block_size), curr_offset(0),
- disk_start_offset(0), disk_curr_offset(0),
- total_size(merge_block_size) {
+ : m_block(nullptr), m_block_len(merge_block_size), m_curr_offset(0),
+ m_disk_start_offset(0), m_disk_curr_offset(0),
+ m_total_size(merge_block_size) {
/* Will throw an exception if it runs out of memory here */
- block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
+ m_block = std::unique_ptr<uchar[]>(new uchar[merge_block_size]);
/* Initialize entire buffer to 0 to avoid valgrind errors */
- memset(block.get(), 0, merge_block_size);
+ memset(m_block.get(), 0, merge_block_size);
}
};
/* Represents an entry in the heap during merge phase of external sort */
struct merge_heap_entry {
- std::shared_ptr<merge_buf_info> chunk_info; /* pointer to buffer info */
- uchar *block; /* pointer to heap memory where record is stored */
- const rocksdb::Comparator *const comparator;
- rocksdb::Slice key; /* current key pointed to by block ptr */
- rocksdb::Slice val;
+ std::shared_ptr<merge_buf_info> m_chunk_info; /* pointer to buffer info */
+ uchar *m_block; /* pointer to heap memory where record is stored */
+ const rocksdb::Comparator *const m_comparator;
+ rocksdb::Slice m_key; /* current key pointed to by block ptr */
+ rocksdb::Slice m_val;
size_t prepare(File fd, ulonglong f_offset, ulonglong chunk_size)
MY_ATTRIBUTE((__nonnull__));
@@ -118,35 +119,37 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
explicit merge_heap_entry(const rocksdb::Comparator *const comparator)
- : chunk_info(nullptr), block(nullptr), comparator(comparator) {}
+ : m_chunk_info(nullptr), m_block(nullptr), m_comparator(comparator) {}
};
struct merge_heap_comparator {
bool operator()(const std::shared_ptr<merge_heap_entry> &lhs,
const std::shared_ptr<merge_heap_entry> &rhs) {
- return lhs->comparator->Compare(rhs->key, lhs->key) < 0;
+ return lhs->m_comparator->Compare(rhs->m_key, lhs->m_key) < 0;
}
};
/* Represents a record in unsorted buffer */
struct merge_record {
- uchar *block; /* points to offset of key in sort buffer */
- const rocksdb::Comparator *const comparator;
+ uchar *m_block; /* points to offset of key in sort buffer */
+ const rocksdb::Comparator *const m_comparator;
bool operator<(const merge_record &record) const {
- return merge_record_compare(this->block, record.block, comparator) < 0;
+ return merge_record_compare(this->m_block, record.m_block, m_comparator) <
+ 0;
}
merge_record(uchar *const block,
const rocksdb::Comparator *const comparator)
- : block(block), comparator(comparator) {}
+ : m_block(block), m_comparator(comparator) {}
};
-private:
+ private:
const char *m_tmpfile_path;
const ulonglong m_merge_buf_size;
const ulonglong m_merge_combine_read_size;
- const rocksdb::Comparator *m_comparator;
+ const ulonglong m_merge_tmp_file_removal_delay;
+ rocksdb::ColumnFamilyHandle *m_cf_handle;
struct merge_file_info m_merge_file;
std::shared_ptr<merge_buf_info> m_rec_buf_unsorted;
std::shared_ptr<merge_buf_info> m_output_buf;
@@ -184,11 +187,12 @@ private:
void read_slice(rocksdb::Slice *slice, const uchar *block_ptr)
MY_ATTRIBUTE((__nonnull__));
-public:
+ public:
Rdb_index_merge(const char *const tmpfile_path,
const ulonglong &merge_buf_size,
const ulonglong &merge_combine_read_size,
- const rocksdb::Comparator *const comparator);
+ const ulonglong &merge_tmp_file_removal_delay,
+ rocksdb::ColumnFamilyHandle *cf);
~Rdb_index_merge();
int init() MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
@@ -213,6 +217,8 @@ public:
MY_ATTRIBUTE((__nonnull__, __warn_unused_result__));
void merge_reset();
+
+ rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; }
};
-} // namespace myrocks
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_io_watchdog.cc b/storage/rocksdb/rdb_io_watchdog.cc
new file mode 100644
index 00000000000..7b229eee47d
--- /dev/null
+++ b/storage/rocksdb/rdb_io_watchdog.cc
@@ -0,0 +1,233 @@
+/*
+ Copyright (c) 2017, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* This C++ file's header */
+#include "./rdb_io_watchdog.h"
+
+/* C++ standard header files */
+#include <string>
+#include <vector>
+
+namespace myrocks {
+
+void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
+ DBUG_ASSERT(timer_data.sival_ptr != nullptr);
+
+ // The treatment of any pending signal generated by the deleted timer is
+ // unspecified. Therefore we still need to handle the rare case where we
+ // finished the I/O operation right before the timer was deleted and callback
+ // was in flight.
+ if (!m_io_in_progress.load()) {
+ return;
+ }
+
+ // At this point we know that I/O has been stuck in `write()` for more than
+ // `m_write_timeout` seconds. We'll log a message and shut down the service.
+ // NO_LINT_DEBUG
+ sql_print_error("MyRocks has detected a combination of I/O requests which "
+ "have cumulatively been blocking for more than %u seconds. "
+ "Shutting the service down.",
+ m_write_timeout);
+
+ abort_with_stack_traces();
+}
+
+void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
+ RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
+
+ DBUG_ASSERT(timer_data.sival_ptr != nullptr);
+
+ struct sigevent e;
+
+ e.sigev_notify = SIGEV_THREAD;
+ e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper;
+ e.sigev_value.sival_ptr = this;
+ e.sigev_notify_attributes = nullptr;
+
+ int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return;
+ }
+
+ struct itimerspec timer_spec;
+ memset(&timer_spec, 0, sizeof(timer_spec));
+
+ // One time execution only for the watchdog. No interval.
+ timer_spec.it_value.tv_sec = m_write_timeout;
+
+ ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
+ errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return;
+ }
+
+ m_io_in_progress.store(true);
+
+ // Verify the write access to all directories we care about.
+ for (const std::string &directory : m_dirs_to_check) {
+ ret = check_write_access(directory);
+
+ // We'll log a warning and attept to continue to see if the problem happens
+ // in other cases as well.
+ if (unlikely(ret != HA_EXIT_SUCCESS)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Unable to verify write access to %s (error code %d).",
+ directory.c_str(), ret);
+ }
+ }
+
+ m_io_in_progress.store(false);
+
+ // Clean up the watchdog timer.
+ ret = timer_delete(m_io_check_watchdog_timer);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno);
+ }
+
+ m_io_check_watchdog_timer = nullptr;
+
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+}
+
+int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
+ DBUG_ASSERT(!dirname.empty());
+ DBUG_ASSERT(m_buf != nullptr);
+
+ const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME;
+
+ // O_DIRECT is a key flag here to make sure that we'll bypass the kernel's
+ // buffer cache.
+ int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC,
+ S_IRWXU | S_IWUSR);
+
+ if (unlikely(fd == -1)) {
+ return fd;
+ }
+
+ int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE);
+
+ if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) {
+ return ret;
+ }
+
+ ret = close(fd);
+
+ if (unlikely(ret)) {
+ return ret;
+ }
+
+ ret = unlink(fname.c_str());
+
+ if (unlikely(ret)) {
+ return ret;
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) {
+ // This function will be called either from a thread initializing MyRocks
+ // engine or handling system variable changes. We need to account for the
+ // possibility of I/O callback executing at the same time. If that happens
+ // then we'll wait for it to finish.
+ RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
+
+ struct sigevent e;
+
+ // In all the cases all the active timers needs to be stopped.
+ int ret = stop_timers();
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Stopping I/O timers failed with %d.", errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return ret;
+ }
+
+ m_write_timeout = write_timeout;
+ m_io_in_progress.store(false);
+
+ // Zero means that the I/O timer will be disabled. Therefore there's nothing
+ // for us to do here.
+ if (!write_timeout) {
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return HA_EXIT_SUCCESS;
+ }
+
+ free(m_buf);
+
+ ret = posix_memalign(reinterpret_cast<void **>(&m_buf),
+ RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE);
+
+ if (unlikely(ret)) {
+ m_buf = nullptr;
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ // NB! The value of errno is not set.
+ return ret;
+ }
+
+ DBUG_ASSERT(m_buf != nullptr);
+ memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE);
+
+ // Common case gets handled here - we'll create a timer with a specific
+ // interval to check a set of directories for write access.
+ DBUG_ASSERT(m_dirs_to_check.size() > 0);
+
+ e.sigev_notify = SIGEV_THREAD;
+ e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper;
+ e.sigev_value.sival_ptr = this;
+ e.sigev_notify_attributes = nullptr;
+
+ ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Creating a I/O timer failed with %d.", errno);
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+ return ret;
+ }
+
+ struct itimerspec timer_spec;
+ memset(&timer_spec, 0, sizeof(timer_spec));
+
+ // I/O timer will need to execute on a certain interval.
+ timer_spec.it_value.tv_sec = m_write_timeout;
+ timer_spec.it_interval.tv_sec = m_write_timeout;
+
+ ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr);
+
+ if (unlikely(ret)) {
+ // NO_LINT_DEBUG
+ sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
+ errno);
+ }
+
+ RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
+
+ return HA_EXIT_SUCCESS;
+}
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_io_watchdog.h b/storage/rocksdb/rdb_io_watchdog.h
new file mode 100644
index 00000000000..0fb77536fb0
--- /dev/null
+++ b/storage/rocksdb/rdb_io_watchdog.h
@@ -0,0 +1,113 @@
+/*
+ Copyright (c) 2017, Facebook, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#pragma once
+
+/* C++ standard header files */
+#include <atomic>
+#include <signal.h>
+#include <stdlib.h>
+#include <string>
+#include <string.h>
+#include <time.h>
+#include <vector>
+
+/* MySQL header files */
+#include "./my_global.h"
+#include "./my_stacktrace.h"
+
+/* MyRocks header files */
+#include "./rdb_utils.h"
+
+namespace myrocks {
+
+class Rdb_io_watchdog {
+ const int RDB_IO_WRITE_BUFFER_SIZE = 4096;
+ const char *const RDB_IO_DUMMY_FILE_NAME = "myrocks_io_watchdog_write_file";
+
+ private:
+ timer_t m_io_check_timer, m_io_check_watchdog_timer;
+ std::atomic<bool> m_io_in_progress;
+ std::vector<std::string> m_dirs_to_check;
+ uint32_t m_write_timeout;
+ mysql_mutex_t m_reset_mutex;
+ char *m_buf;
+
+ int check_write_access(const std::string &dirname) const;
+ void io_check_callback(union sigval timer_data);
+ void expire_io_callback(union sigval timer_data);
+
+ int stop_timers() {
+ int ret = 0;
+
+ if (m_io_check_watchdog_timer) {
+ ret = timer_delete(m_io_check_watchdog_timer);
+
+ if (!ret) {
+ m_io_check_watchdog_timer = nullptr;
+ }
+ }
+
+ if (m_io_check_timer && !ret) {
+ ret = timer_delete(m_io_check_timer);
+
+ if (!ret) {
+ m_io_check_timer = nullptr;
+ }
+ }
+
+ return ret;
+ }
+
+ static void io_check_callback_wrapper(union sigval timer_data) {
+ Rdb_io_watchdog *io_watchdog =
+ static_cast<Rdb_io_watchdog *>(timer_data.sival_ptr);
+ DBUG_ASSERT(io_watchdog != nullptr);
+
+ io_watchdog->io_check_callback(timer_data);
+ }
+
+ static void expire_io_callback_wrapper(union sigval timer_data) {
+ Rdb_io_watchdog *io_watchdog =
+ static_cast<Rdb_io_watchdog *>(timer_data.sival_ptr);
+ DBUG_ASSERT(io_watchdog != nullptr);
+
+ io_watchdog->expire_io_callback(timer_data);
+ }
+
+ public:
+ explicit Rdb_io_watchdog(const std::vector<std::string> &directories)
+ : m_io_check_timer(nullptr), m_io_check_watchdog_timer(nullptr),
+ m_io_in_progress(false), m_dirs_to_check(std::move(directories)),
+ m_buf(nullptr) {
+ DBUG_ASSERT(m_dirs_to_check.size() > 0);
+ mysql_mutex_init(0, &m_reset_mutex, MY_MUTEX_INIT_FAST);
+ }
+
+ ~Rdb_io_watchdog() {
+ // We're shutting down. Ignore errors possibly coming from timer deletion.
+ static_cast<void>(stop_timers());
+ mysql_mutex_destroy(&m_reset_mutex);
+ free(m_buf);
+ }
+
+ int reset_timeout(const uint32_t &write_timeout);
+
+ Rdb_io_watchdog(const Rdb_io_watchdog &) = delete;
+ Rdb_io_watchdog &operator=(const Rdb_io_watchdog &) = delete;
+};
+
+} // namespace myrocks
diff --git a/storage/rocksdb/rdb_mariadb_server_port.h b/storage/rocksdb/rdb_mariadb_server_port.h
index e424fbb91f8..a1b42b9d4bb 100644
--- a/storage/rocksdb/rdb_mariadb_server_port.h
+++ b/storage/rocksdb/rdb_mariadb_server_port.h
@@ -70,4 +70,37 @@ class Regex_list_handler
void warn_about_bad_patterns(const Regex_list_handler* regex_list_handler,
const char *name);
+void print_keydup_error(TABLE *table, KEY *key, myf errflag,
+ const THD *thd, const char *org_table_name=NULL);
+
+// Split a string based on a delimiter. Two delimiters in a row will not add
+// an empty string in the set.
+inline
+std::vector<std::string> split_into_vector(const std::string& input,
+ char delimiter)
+{
+ size_t pos;
+ size_t start = 0;
+ std::vector<std::string> elems;
+
+ // Find next delimiter
+ while ((pos = input.find(delimiter, start)) != std::string::npos)
+ {
+ // If there is any data since the last delimiter add it to the list
+ if (pos > start)
+ elems.push_back(input.substr(start, pos - start));
+
+ // Set our start position to the character after the delimiter
+ start = pos + 1;
+ }
+
+ // Add a possible string since the last delimiter
+ if (input.length() > start)
+ elems.push_back(input.substr(start));
+
+ // Return the resulting list back to the caller
+ return elems;
+}
+
+
#endif
diff --git a/storage/rocksdb/rdb_perf_context.cc b/storage/rocksdb/rdb_perf_context.cc
index 7fdee15157e..291d38b68f3 100644
--- a/storage/rocksdb/rdb_perf_context.cc
+++ b/storage/rocksdb/rdb_perf_context.cc
@@ -86,14 +86,14 @@ std::string rdb_pc_stat_types[] = {
#define IO_PERF_RECORD(_field_) \
do { \
- if (rocksdb::perf_context._field_ > 0) \
- counters->m_value[idx] += rocksdb::perf_context._field_; \
+ if (rocksdb::get_perf_context()->_field_ > 0) \
+ counters->m_value[idx] += rocksdb::get_perf_context()->_field_; \
idx++; \
} while (0)
#define IO_STAT_RECORD(_field_) \
do { \
- if (rocksdb::iostats_context._field_ > 0) \
- counters->m_value[idx] += rocksdb::iostats_context._field_; \
+ if (rocksdb::get_iostats_context()->_field_ > 0) \
+ counters->m_value[idx] += rocksdb::get_iostats_context()->_field_; \
idx++; \
} while (0)
@@ -174,11 +174,21 @@ bool Rdb_io_perf::start(const uint32_t perf_context_level) {
return false;
}
- rocksdb::perf_context.Reset();
- rocksdb::iostats_context.Reset();
+ rocksdb::get_perf_context()->Reset();
+ rocksdb::get_iostats_context()->Reset();
return true;
}
+void Rdb_io_perf::update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written) {
+ const rocksdb::PerfLevel perf_level =
+ static_cast<rocksdb::PerfLevel>(perf_context_level);
+ if (perf_level != rocksdb::kDisable && m_shared_io_perf_write) {
+ io_write_bytes += bytes_written;
+ io_write_requests += 1;
+ }
+}
+
void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
const rocksdb::PerfLevel perf_level =
static_cast<rocksdb::PerfLevel>(perf_context_level);
@@ -192,22 +202,23 @@ void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
}
harvest_diffs(&rdb_global_perf_counters);
- if (m_shared_io_perf_read && (rocksdb::perf_context.block_read_byte != 0 ||
- rocksdb::perf_context.block_read_count != 0 ||
- rocksdb::perf_context.block_read_time != 0)) {
+ if (m_shared_io_perf_read &&
+ (rocksdb::get_perf_context()->block_read_byte != 0 ||
+ rocksdb::get_perf_context()->block_read_count != 0 ||
+ rocksdb::get_perf_context()->block_read_time != 0)) {
#ifdef MARIAROCKS_NOT_YET
my_io_perf_t io_perf_read;
io_perf_read.init();
- io_perf_read.bytes = rocksdb::perf_context.block_read_byte;
- io_perf_read.requests = rocksdb::perf_context.block_read_count;
+ io_perf_read.bytes = rocksdb::get_perf_context()->block_read_byte;
+ io_perf_read.requests = rocksdb::get_perf_context()->block_read_count;
/*
Rocksdb does not distinguish between I/O service and wait time, so just
use svc time.
*/
io_perf_read.svc_time_max = io_perf_read.svc_time =
- rocksdb::perf_context.block_read_time;
+ rocksdb::get_perf_context()->block_read_time;
m_shared_io_perf_read->sum(io_perf_read);
m_stats->table_io_perf_read.sum(io_perf_read);
@@ -215,14 +226,27 @@ void Rdb_io_perf::end_and_record(const uint32_t perf_context_level) {
}
#ifdef MARIAROCKS_NOT_YET
+ if (m_shared_io_perf_write &&
+ (io_write_bytes != 0 || io_write_requests != 0)) {
+ my_io_perf_t io_perf_write;
+ io_perf_write.init();
+ io_perf_write.bytes = io_write_bytes;
+ io_perf_write.requests = io_write_requests;
+ m_shared_io_perf_write->sum(io_perf_write);
+ m_stats->table_io_perf_write.sum(io_perf_write);
+ io_write_bytes = 0;
+ io_write_requests = 0;
+ }
+
if (m_stats) {
- if (rocksdb::perf_context.internal_key_skipped_count != 0) {
- m_stats->key_skipped += rocksdb::perf_context.internal_key_skipped_count;
+ if (rocksdb::get_perf_context()->internal_key_skipped_count != 0) {
+ m_stats->key_skipped +=
+ rocksdb::get_perf_context()->internal_key_skipped_count;
}
- if (rocksdb::perf_context.internal_delete_skipped_count != 0) {
+ if (rocksdb::get_perf_context()->internal_delete_skipped_count != 0) {
m_stats->delete_skipped +=
- rocksdb::perf_context.internal_delete_skipped_count;
+ rocksdb::get_perf_context()->internal_delete_skipped_count;
}
}
#endif
diff --git a/storage/rocksdb/rdb_perf_context.h b/storage/rocksdb/rdb_perf_context.h
index 9d580ff0b8a..f9b9fd48d3e 100644
--- a/storage/rocksdb/rdb_perf_context.h
+++ b/storage/rocksdb/rdb_perf_context.h
@@ -110,30 +110,42 @@ class Rdb_io_perf {
// Context management
Rdb_atomic_perf_counters *m_atomic_counters = nullptr;
my_io_perf_atomic_t *m_shared_io_perf_read = nullptr;
+ my_io_perf_atomic_t *m_shared_io_perf_write = nullptr;
ha_statistics *m_stats = nullptr;
-public:
+ uint64_t io_write_bytes;
+ uint64_t io_write_requests;
+
+ public:
Rdb_io_perf(const Rdb_io_perf &) = delete;
Rdb_io_perf &operator=(const Rdb_io_perf &) = delete;
void init(Rdb_atomic_perf_counters *const atomic_counters,
my_io_perf_atomic_t *const shared_io_perf_read,
+ my_io_perf_atomic_t *const shared_io_perf_write,
ha_statistics *const stats) {
DBUG_ASSERT(atomic_counters != nullptr);
DBUG_ASSERT(shared_io_perf_read != nullptr);
+ DBUG_ASSERT(shared_io_perf_write != nullptr);
DBUG_ASSERT(stats != nullptr);
m_atomic_counters = atomic_counters;
m_shared_io_perf_read = shared_io_perf_read;
+ m_shared_io_perf_write = shared_io_perf_write;
m_stats = stats;
+
+ io_write_bytes = 0;
+ io_write_requests = 0;
}
bool start(const uint32_t perf_context_level);
+ void update_bytes_written(const uint32_t perf_context_level,
+ ulonglong bytes_written);
void end_and_record(const uint32_t perf_context_level);
explicit Rdb_io_perf()
: m_atomic_counters(nullptr), m_shared_io_perf_read(nullptr),
- m_stats(nullptr) {}
+ m_stats(nullptr), io_write_bytes(0), io_write_requests(0) {}
};
} // namespace myrocks
diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc
index 34d32aeb347..8ca00bb30f5 100644
--- a/storage/rocksdb/rdb_sst_info.cc
+++ b/storage/rocksdb/rdb_sst_info.cc
@@ -27,6 +27,7 @@
/* C++ standard header files */
#include <cstdio>
#include <string>
+#include <utility>
#include <vector>
/* MySQL header files */
@@ -45,17 +46,17 @@
namespace myrocks {
-Rdb_sst_file::Rdb_sst_file(rocksdb::DB *const db,
- rocksdb::ColumnFamilyHandle *const cf,
- const rocksdb::DBOptions &db_options,
- const std::string &name, const bool tracing)
+Rdb_sst_file_ordered::Rdb_sst_file::Rdb_sst_file(
+ rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options, const std::string &name,
+ const bool tracing)
: m_db(db), m_cf(cf), m_db_options(db_options), m_sst_file_writer(nullptr),
- m_name(name), m_tracing(tracing) {
+ m_name(name), m_tracing(tracing), m_comparator(cf->GetComparator()) {
DBUG_ASSERT(db != nullptr);
DBUG_ASSERT(cf != nullptr);
}
-Rdb_sst_file::~Rdb_sst_file() {
+Rdb_sst_file_ordered::Rdb_sst_file::~Rdb_sst_file() {
// Make sure we clean up
delete m_sst_file_writer;
m_sst_file_writer = nullptr;
@@ -66,7 +67,7 @@ Rdb_sst_file::~Rdb_sst_file() {
std::remove(m_name.c_str());
}
-rocksdb::Status Rdb_sst_file::open() {
+rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::open() {
DBUG_ASSERT(m_sst_file_writer == nullptr);
rocksdb::ColumnFamilyDescriptor cf_descr;
@@ -77,13 +78,11 @@ rocksdb::Status Rdb_sst_file::open() {
}
// Create an sst file writer with the current options and comparator
- const rocksdb::Comparator *comparator = m_cf->GetComparator();
-
const rocksdb::EnvOptions env_options(m_db_options);
const rocksdb::Options options(m_db_options, cf_descr.options);
m_sst_file_writer =
- new rocksdb::SstFileWriter(env_options, options, comparator, m_cf);
+ new rocksdb::SstFileWriter(env_options, options, m_comparator, m_cf);
s = m_sst_file_writer->Open(m_name);
if (m_tracing) {
@@ -100,15 +99,19 @@ rocksdb::Status Rdb_sst_file::open() {
return s;
}
-rocksdb::Status Rdb_sst_file::put(const rocksdb::Slice &key,
- const rocksdb::Slice &value) {
+rocksdb::Status
+Rdb_sst_file_ordered::Rdb_sst_file::put(const rocksdb::Slice &key,
+ const rocksdb::Slice &value) {
DBUG_ASSERT(m_sst_file_writer != nullptr);
// Add the specified key/value to the sst file writer
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return m_sst_file_writer->Add(key, value);
}
-std::string Rdb_sst_file::generateKey(const std::string &key) {
+std::string
+Rdb_sst_file_ordered::Rdb_sst_file::generateKey(const std::string &key) {
static char const hexdigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
@@ -125,7 +128,7 @@ std::string Rdb_sst_file::generateKey(const std::string &key) {
}
// This function is run by the background thread
-rocksdb::Status Rdb_sst_file::commit() {
+rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::commit() {
DBUG_ASSERT(m_sst_file_writer != nullptr);
rocksdb::Status s;
@@ -175,13 +178,153 @@ rocksdb::Status Rdb_sst_file::commit() {
return s;
}
+void Rdb_sst_file_ordered::Rdb_sst_stack::push(const rocksdb::Slice &key,
+ const rocksdb::Slice &value) {
+ if (m_buffer == nullptr) {
+ m_buffer = new char[m_buffer_size];
+ }
+
+ // Put the actual key and value data unto our stack
+ size_t key_offset = m_offset;
+ memcpy(m_buffer + m_offset, key.data(), key.size());
+ m_offset += key.size();
+ memcpy(m_buffer + m_offset, value.data(), value.size());
+ m_offset += value.size();
+
+ // Push just the offset, the key length and the value length onto the stack
+ m_stack.push(std::make_tuple(key_offset, key.size(), value.size()));
+}
+
+std::pair<rocksdb::Slice, rocksdb::Slice>
+Rdb_sst_file_ordered::Rdb_sst_stack::top() {
+ size_t offset, key_len, value_len;
+ // Pop the next item off the internal stack
+ std::tie(offset, key_len, value_len) = m_stack.top();
+
+ // Make slices from the offset (first), key length (second), and value
+ // length (third)
+ DBUG_ASSERT(m_buffer != nullptr);
+ rocksdb::Slice key(m_buffer + offset, key_len);
+ rocksdb::Slice value(m_buffer + offset + key_len, value_len);
+
+ return std::make_pair(key, value);
+}
+
+Rdb_sst_file_ordered::Rdb_sst_file_ordered(
+ rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options, const std::string &name,
+ const bool tracing, size_t max_size)
+ : m_use_stack(false), m_first(true), m_stack(max_size),
+ m_file(db, cf, db_options, name, tracing) {
+ m_stack.reset();
+}
+
+rocksdb::Status Rdb_sst_file_ordered::apply_first() {
+ rocksdb::Slice first_key_slice(m_first_key);
+ rocksdb::Slice first_value_slice(m_first_value);
+ rocksdb::Status s;
+
+ if (m_use_stack) {
+ // Put the first key onto the stack
+ m_stack.push(first_key_slice, first_value_slice);
+ } else {
+ // Put the first key into the SST
+ s = m_file.put(first_key_slice, first_value_slice);
+ if (!s.ok()) {
+ return s;
+ }
+ }
+
+ // Clear out the 'first' strings for next key/value
+ m_first_key.clear();
+ m_first_value.clear();
+
+ return s;
+}
+
+rocksdb::Status Rdb_sst_file_ordered::put(const rocksdb::Slice &key,
+ const rocksdb::Slice &value) {
+ rocksdb::Status s;
+
+ // If this is the first key, just store a copy of the key and value
+ if (m_first) {
+ m_first_key = key.ToString();
+ m_first_value = value.ToString();
+ m_first = false;
+ return rocksdb::Status::OK();
+ }
+
+ // If the first key is not empty we must be the second key. Compare the
+ // new key with the first key to determine if the data will go straight
+ // the SST or be put on the stack to be retrieved later.
+ if (!m_first_key.empty()) {
+ rocksdb::Slice first_key_slice(m_first_key);
+ int cmp = m_file.compare(first_key_slice, key);
+ DBUG_ASSERT(cmp != 0);
+ m_use_stack = (cmp > 0);
+
+ // Apply the first key to the stack or SST
+ s = apply_first();
+ if (!s.ok()) {
+ return s;
+ }
+ }
+
+ // Put this key on the stack or into the SST
+ if (m_use_stack) {
+ m_stack.push(key, value);
+ } else {
+ s = m_file.put(key, value);
+ }
+
+ return s;
+}
+
+rocksdb::Status Rdb_sst_file_ordered::commit() {
+ rocksdb::Status s;
+
+ // Make sure we get the first key if it was the only key given to us.
+ if (!m_first_key.empty()) {
+ s = apply_first();
+ if (!s.ok()) {
+ return s;
+ }
+ }
+
+ if (m_use_stack) {
+ rocksdb::Slice key;
+ rocksdb::Slice value;
+
+ // We are ready to commit, pull each entry off the stack (which reverses
+ // the original data) and send it to the SST file.
+ while (!m_stack.empty()) {
+ std::tie(key, value) = m_stack.top();
+ s = m_file.put(key, value);
+ if (!s.ok()) {
+ return s;
+ }
+
+ m_stack.pop();
+ }
+
+ // We have pulled everything off the stack, reset for the next time
+ m_stack.reset();
+ m_use_stack = false;
+ }
+
+ // reset m_first
+ m_first = true;
+
+ return m_file.commit();
+}
+
Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
const std::string &indexname,
rocksdb::ColumnFamilyHandle *const cf,
const rocksdb::DBOptions &db_options,
const bool &tracing)
: m_db(db), m_cf(cf), m_db_options(db_options), m_curr_size(0),
- m_sst_count(0), m_error_msg(""),
+ m_sst_count(0), m_background_error(HA_EXIT_SUCCESS),
#if defined(RDB_SST_INFO_USE_THREAD)
m_queue(), m_mutex(), m_cond(), m_thread(nullptr), m_finished(false),
#endif
@@ -228,15 +371,16 @@ int Rdb_sst_info::open_new_sst_file() {
const std::string name = m_prefix + std::to_string(m_sst_count++) + m_suffix;
// Create the new sst file object
- m_sst_file = new Rdb_sst_file(m_db, m_cf, m_db_options, name, m_tracing);
+ m_sst_file = new Rdb_sst_file_ordered(m_db, m_cf, m_db_options,
+ name, m_tracing, m_max_size);
// Open the sst file
const rocksdb::Status s = m_sst_file->open();
if (!s.ok()) {
- set_error_msg(m_sst_file->get_name(), s.ToString());
+ set_error_msg(m_sst_file->get_name(), s);
delete m_sst_file;
m_sst_file = nullptr;
- return HA_EXIT_FAILURE;
+ return HA_ERR_ROCKSDB_BULK_LOAD;
}
m_curr_size = 0;
@@ -267,7 +411,8 @@ void Rdb_sst_info::close_curr_sst_file() {
#else
const rocksdb::Status s = m_sst_file->commit();
if (!s.ok()) {
- set_error_msg(m_sst_file->get_name(), s.ToString());
+ set_error_msg(m_sst_file->get_name(), s);
+ set_background_error(HA_ERR_ROCKSDB_BULK_LOAD);
}
delete m_sst_file;
@@ -281,14 +426,14 @@ void Rdb_sst_info::close_curr_sst_file() {
int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
int rc;
- if (m_curr_size >= m_max_size) {
+ if (m_curr_size + key.size() + value.size() >= m_max_size) {
// The current sst file has reached its maximum, close it out
close_curr_sst_file();
// While we are here, check to see if we have had any errors from the
// background thread - we don't want to wait for the end to report them
- if (!m_error_msg.empty()) {
- return HA_EXIT_FAILURE;
+ if (have_background_error()) {
+ return get_and_reset_background_error();
}
}
@@ -305,8 +450,8 @@ int Rdb_sst_info::put(const rocksdb::Slice &key, const rocksdb::Slice &value) {
// Add the key/value to the current sst file
const rocksdb::Status s = m_sst_file->put(key, value);
if (!s.ok()) {
- set_error_msg(m_sst_file->get_name(), s.ToString());
- return HA_EXIT_FAILURE;
+ set_error_msg(m_sst_file->get_name(), s);
+ return HA_ERR_ROCKSDB_BULK_LOAD;
}
m_curr_size += key.size() + value.size();
@@ -334,25 +479,36 @@ int Rdb_sst_info::commit() {
#endif
// Did we get any errors?
- if (!m_error_msg.empty()) {
- return HA_EXIT_FAILURE;
+ if (have_background_error()) {
+ return get_and_reset_background_error();
}
return HA_EXIT_SUCCESS;
}
void Rdb_sst_info::set_error_msg(const std::string &sst_file_name,
- const std::string &msg) {
+ const rocksdb::Status &s) {
#if defined(RDB_SST_INFO_USE_THREAD)
// Both the foreground and background threads can set the error message
// so lock the mutex to protect it. We only want the first error that
// we encounter.
const std::lock_guard<std::mutex> guard(m_mutex);
#endif
- my_printf_error(ER_UNKNOWN_ERROR, "[%s] bulk load error: %s", MYF(0),
- sst_file_name.c_str(), msg.c_str());
- if (m_error_msg.empty()) {
- m_error_msg = "[" + sst_file_name + "] " + msg;
+ if (s.IsInvalidArgument() &&
+ strcmp(s.getState(), "Keys must be added in order") == 0) {
+ my_printf_error(ER_KEYS_OUT_OF_ORDER,
+ "Rows must be inserted in primary key order "
+ "during bulk load operation",
+ MYF(0));
+ } else if (s.IsInvalidArgument() &&
+ strcmp(s.getState(), "Global seqno is required, but disabled") ==
+ 0) {
+ my_printf_error(ER_OVERLAPPING_KEYS, "Rows inserted during bulk load "
+ "must not overlap existing rows",
+ MYF(0));
+ } else {
+ my_printf_error(ER_UNKNOWN_ERROR, "[%s] bulk load error: %s", MYF(0),
+ sst_file_name.c_str(), s.ToString().c_str());
}
}
@@ -363,15 +519,15 @@ void Rdb_sst_info::thread_fcn(void *object) {
}
void Rdb_sst_info::run_thread() {
- const std::unique_lock<std::mutex> lk(m_mutex);
+ std::unique_lock<std::mutex> lk(m_mutex);
do {
// Wait for notification or 1 second to pass
m_cond.wait_for(lk, std::chrono::seconds(1));
- // Inner loop pulls off all Rdb_sst_file entries and processes them
+ // Inner loop pulls off all Rdb_sst_file_ordered entries and processes them
while (!m_queue.empty()) {
- const Rdb_sst_file *const sst_file = m_queue.front();
+ Rdb_sst_file_ordered *const sst_file = m_queue.front();
m_queue.pop();
// Release the lock - we don't want to hold it while committing the file
@@ -380,7 +536,8 @@ void Rdb_sst_info::run_thread() {
// Close out the sst file and add it to the database
const rocksdb::Status s = sst_file->commit();
if (!s.ok()) {
- set_error_msg(sst_file->get_name(), s.ToString());
+ set_error_msg(sst_file->get_name(), s);
+ set_background_error(HA_ERR_ROCKSDB_BULK_LOAD);
}
delete sst_file;
diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h
index 4211ec6340d..1dee0fd0518 100644
--- a/storage/rocksdb/rdb_sst_info.h
+++ b/storage/rocksdb/rdb_sst_info.h
@@ -21,46 +21,100 @@
#include <condition_variable>
#include <mutex>
#include <queue>
+#include <stack>
#include <string>
#include <thread>
+#include <utility>
#include <vector>
/* RocksDB header files */
#include "rocksdb/db.h"
#include "rocksdb/sst_file_writer.h"
-// define RDB_SST_INFO_USE_THREAD /* uncomment to use threads */
+/* MyRocks header files */
+#include "./rdb_utils.h"
-namespace myrocks {
-
-class Rdb_sst_file {
-private:
- Rdb_sst_file(const Rdb_sst_file &p) = delete;
- Rdb_sst_file &operator=(const Rdb_sst_file &p) = delete;
-
- rocksdb::DB *const m_db;
- rocksdb::ColumnFamilyHandle *const m_cf;
- const rocksdb::DBOptions &m_db_options;
- rocksdb::SstFileWriter *m_sst_file_writer;
- const std::string m_name;
- const bool m_tracing;
-
- std::string generateKey(const std::string &key);
+// #define RDB_SST_INFO_USE_THREAD /* uncomment to use threads */
-public:
- Rdb_sst_file(rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
- const rocksdb::DBOptions &db_options, const std::string &name,
- const bool tracing);
- ~Rdb_sst_file();
+namespace myrocks {
- rocksdb::Status open();
+class Rdb_sst_file_ordered {
+ private:
+ class Rdb_sst_file {
+ private:
+ Rdb_sst_file(const Rdb_sst_file &p) = delete;
+ Rdb_sst_file &operator=(const Rdb_sst_file &p) = delete;
+
+ rocksdb::DB *const m_db;
+ rocksdb::ColumnFamilyHandle *const m_cf;
+ const rocksdb::DBOptions &m_db_options;
+ rocksdb::SstFileWriter *m_sst_file_writer;
+ const std::string m_name;
+ const bool m_tracing;
+ const rocksdb::Comparator *m_comparator;
+
+ std::string generateKey(const std::string &key);
+
+ public:
+ Rdb_sst_file(rocksdb::DB *const db, rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options, const std::string &name,
+ const bool tracing);
+ ~Rdb_sst_file();
+
+ rocksdb::Status open();
+ rocksdb::Status put(const rocksdb::Slice &key, const rocksdb::Slice &value);
+ rocksdb::Status commit();
+
+ inline const std::string get_name() const { return m_name; }
+ inline int compare(rocksdb::Slice key1, rocksdb::Slice key2) {
+ return m_comparator->Compare(key1, key2);
+ }
+ };
+
+ class Rdb_sst_stack {
+ private:
+ char *m_buffer;
+ size_t m_buffer_size;
+ size_t m_offset;
+ std::stack<std::tuple<size_t, size_t, size_t>> m_stack;
+
+ public:
+ explicit Rdb_sst_stack(size_t max_size)
+ : m_buffer(nullptr), m_buffer_size(max_size) {}
+ ~Rdb_sst_stack() { delete[] m_buffer; }
+
+ void reset() { m_offset = 0; }
+ bool empty() { return m_stack.empty(); }
+ void push(const rocksdb::Slice &key, const rocksdb::Slice &value);
+ std::pair<rocksdb::Slice, rocksdb::Slice> top();
+ void pop() { m_stack.pop(); }
+ size_t size() { return m_stack.size(); }
+ };
+
+ bool m_use_stack;
+ bool m_first;
+ std::string m_first_key;
+ std::string m_first_value;
+ Rdb_sst_stack m_stack;
+ Rdb_sst_file m_file;
+
+ rocksdb::Status apply_first();
+
+ public:
+ Rdb_sst_file_ordered(rocksdb::DB *const db,
+ rocksdb::ColumnFamilyHandle *const cf,
+ const rocksdb::DBOptions &db_options,
+ const std::string &name, const bool tracing,
+ size_t max_size);
+
+ inline rocksdb::Status open() { return m_file.open(); }
rocksdb::Status put(const rocksdb::Slice &key, const rocksdb::Slice &value);
rocksdb::Status commit();
- const std::string get_name() const { return m_name; }
+ inline const std::string get_name() const { return m_file.get_name(); }
};
class Rdb_sst_info {
-private:
+ private:
Rdb_sst_info(const Rdb_sst_info &p) = delete;
Rdb_sst_info &operator=(const Rdb_sst_info &p) = delete;
@@ -70,23 +124,24 @@ private:
uint64_t m_curr_size;
uint64_t m_max_size;
uint32_t m_sst_count;
- std::string m_error_msg;
+ std::atomic<int> m_background_error;
std::string m_prefix;
static std::atomic<uint64_t> m_prefix_counter;
static std::string m_suffix;
#if defined(RDB_SST_INFO_USE_THREAD)
- std::queue<Rdb_sst_file *> m_queue;
+ std::queue<Rdb_sst_file_ordered *> m_queue;
std::mutex m_mutex;
std::condition_variable m_cond;
std::thread *m_thread;
bool m_finished;
#endif
- Rdb_sst_file *m_sst_file;
+ Rdb_sst_file_ordered *m_sst_file;
const bool m_tracing;
int open_new_sst_file();
void close_curr_sst_file();
- void set_error_msg(const std::string &sst_file_name, const std::string &msg);
+ void set_error_msg(const std::string &sst_file_name,
+ const rocksdb::Status &s);
#if defined(RDB_SST_INFO_USE_THREAD)
void run_thread();
@@ -94,7 +149,7 @@ private:
static void thread_fcn(void *object);
#endif
-public:
+ public:
Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
const std::string &indexname,
rocksdb::ColumnFamilyHandle *const cf,
@@ -104,7 +159,22 @@ public:
int put(const rocksdb::Slice &key, const rocksdb::Slice &value);
int commit();
- const std::string &error_message() const { return m_error_msg; }
+ bool have_background_error() { return m_background_error != 0; }
+
+ int get_and_reset_background_error() {
+ int ret = m_background_error;
+ while (!m_background_error.compare_exchange_weak(ret, HA_EXIT_SUCCESS)) {
+ // Do nothing
+ }
+
+ return ret;
+ }
+
+ void set_background_error(int code) {
+ int expected = HA_EXIT_SUCCESS;
+ // Only assign 'code' into the error if it is already 0, otherwise ignore it
+ m_background_error.compare_exchange_strong(expected, code);
+ }
static void init(const rocksdb::DB *const db);
};
diff --git a/storage/rocksdb/rdb_utils.cc b/storage/rocksdb/rdb_utils.cc
index 5dff63cbf4c..335676a6ba4 100644
--- a/storage/rocksdb/rdb_utils.cc
+++ b/storage/rocksdb/rdb_utils.cc
@@ -23,7 +23,7 @@
#include <array>
#include <string>
#include <vector>
-#include <sstream> //psergey-merge
+#include <sstream>
/* C standard header files */
#include <ctype.h>
@@ -303,6 +303,18 @@ bool rdb_database_exists(const std::string &db_name) {
return true;
}
+void rdb_log_status_error(const rocksdb::Status &s, const char *msg) {
+ if (msg == nullptr) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: status error, code: %d, error message: %s",
+ s.code(), s.ToString().c_str());
+ return;
+ }
+
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: %s, Status Code: %d, Status: %s", msg, s.code(),
+ s.ToString().c_str());
+}
/*
@brief
diff --git a/storage/rocksdb/rdb_utils.h b/storage/rocksdb/rdb_utils.h
index 71ec8ef54ab..3feda5d82ad 100644
--- a/storage/rocksdb/rdb_utils.h
+++ b/storage/rocksdb/rdb_utils.h
@@ -29,6 +29,7 @@
/* RocksDB header files */
#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
#ifdef HAVE_JEMALLOC
#include <jemalloc/jemalloc.h>
@@ -253,6 +254,8 @@ inline void rdb_check_mutex_call_result(const char *function_name,
}
}
+void rdb_log_status_error(const rocksdb::Status &s, const char *msg = nullptr);
+
/*
Helper functions to parse strings.
*/
diff --git a/storage/rocksdb/rocksdb b/storage/rocksdb/rocksdb
-Subproject ba4c77bd6b16ea493c555561ed2e59bdc4c15fc
+Subproject 9a970c81af9807071bd690f4c808c5045866291
diff --git a/storage/rocksdb/rocksdb-range-access.txt b/storage/rocksdb/rocksdb-range-access.txt
index c974279ac77..6b5a0db938a 100644
--- a/storage/rocksdb/rocksdb-range-access.txt
+++ b/storage/rocksdb/rocksdb-range-access.txt
@@ -38,39 +38,17 @@ When we need to seek to a tuple that is a prefix of a full key:
( kv )-ccc-pk
( kv )-bbb-pk3
( kv )-bbb-pk2
- ( kv )-bbb-pk1 < -- We need to be here
-# ( kv )-bbb <---we call Seek(kv-bbb)
- ( kv )-aaa-pk ... and end up here. Should call it->Prev().
+ ( kv )-bbb-pk1 <--- SeekForPrev("kv-bbb") will put us here on the previous
+ record.
+# ( kv )-bbb <--- "kv-bbb" doesn't exist in the database, but it would be
+ ( kv )-aaa-pk here.
-There is a special case when (kv)-bbb-pk1 is the last record in the CF, and
-we get invalid iterator. Then, we need to call SeekToLast().
-
-Another kind of special case is when we need to seek to the full value.
-Suppose, the lookup tuple is kv-bbb-pk1:
-
- (kv+1)-xxx-pk
- ( kv )-ccc-pk
- ( kv )-bbb-pk3
- ( kv )-bbb-pk2
- ( kv )-bbb-pk1 < -- Seek(kv-bbb-pk1)
- ( kv )-bbb-pk0
-
-Then, Seek(kv-bbb-pk1) will position us exactly the tuple we need, and we
-won't need to call it->Prev(). If we get an invalid iterator, there is no
-need to call SeekToLast().
+Even when (kv)-bbb-pk1 is the last record in the CF, SeekForPrev() will find the
+last record before "kv-bbb", so it already takes care of this case for us.
RocksDB calls:
- it->Seek(tuple);
-
- if (!using_full_key)
- {
- if (!it->Valid())
- it->SeekToLast();
- else
- it->Prev();
- }
-
+ it->SeekForPrev(kv);
if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
return record.
@@ -90,7 +68,7 @@ If lookup tuple is kv-bbb:
RocksDB calls:
Seek(kv);
- if (it->Valid() && kd->covers_key(..) && kd->cmp_full_keys(...))
+ if (it->Valid() && kd->covers_key(..))
return record.
== HA_READ_KEY_OR_NEXT, backward CF ==
@@ -101,12 +79,13 @@ When specified key tuple is a key prefix:
( kv )-ccc-pk
( kv )-bbb-pk3
( kv )-bbb-pk2
- ( kv )-bbb-pk1 < -- We need to be here (or above)
-# ( kv )-bbb <---we call Seek(kv-bbb)
- ( kv )-aaa-pk ... and end up here. Should call it->Prev().
+ ( kv )-bbb-pk1 <--- Seek("kv-bbb") will put us here on the previous record.
+# ( kv )-bbb <--- "kv-bbb" doesn't exist in the database, but it would be
+ here.
+ ( kv )-aaa-pk
-There is a special case when (kv)-bbb-pk1 is the last record in the CF, and
-we get invalid iterator. Then, we need to call SeekToLast().
+Even when (kv)-bbb-pk1 is the last record in the CF, SeekForPrev() will find the
+last record before "kv-bbb", so it already takes care of this case for us.
Another kind of special case is when we need to seek to the full value.
Suppose, the lookup tuple is kv-bbb-pk1:
@@ -115,28 +94,16 @@ Suppose, the lookup tuple is kv-bbb-pk1:
( kv )-ccc-pk
( kv )-bbb-pk3
( kv )-bbb-pk2
- ( kv )-bbb-pk1 < -- Seek(kv-bbb-pk1)
+ ( kv )-bbb-pk1 < -- SeekForPrev(kv-bbb-pk1)
( kv )-bbb-pk0
-Then, Seek(kv-bbb-pk1) may position us exactly at the tuple we need, and we
-won't need to call it->Prev().
-If kv-bbb-pk1 is not present in the database, we will be positioned on
-kv-bbb-pk0, and we will need to call it->Prev().
-If we get an invalid iterator, we DO need to call SeekToLast().
+Then, SeekForPrev(kv-bbb-pk1) may position us exactly at the tuple we need.
+Even If kv-bbb-pk1 is not present in the database, we will be positioned on
+kv-bbb-pk2 no matter whether kv-bbb-pk2 is the last key or not.
RocksDB calls:
- Seek(...);
-
- if (!it->Valid())
- it->SeekToLast();
- else
- {
- if (!using_full_key ||
- !(kd->covers_key(...) || kd->cmp_full_keys(...))
- it->Prev();
- }
-
+ SeekForPrev(...);
if (it->Valid() && kd->covers_key(..))
return record.
@@ -153,7 +120,7 @@ Suppose lookup_key = kv-bbb
( kv )-bbb-pk3
( kv )-bbb-pk4
( kv )-bbb-pk5
- ( kv )-ccc-pkN <-- That is, we need to be here.
+ ( kv )-ccc-pkN <--- That is, we need to be here.
However, we don't know that the next value is kv-ccc. Instead, we seek to the
first value that strictly greater than 'kv-bbb'. It is Successor(kv-bbb).
@@ -163,7 +130,7 @@ It doesn't matter if we're using a full extended key or not.
RocksDB calls:
Seek(Successor(kv-bbb));
- if (it->Valid() && kd->covers_key(it.key()))
+ if (it->Valid() && kd->covers_key(...))
return record;
Note that the code is the same as with HA_READ_KEY_OR_NEXT, except that
@@ -175,47 +142,25 @@ Suppose, the lookup key is 'kv-bbb':
(kv+1)-xxx-pk
( kv )-ccc-pk7
- ( kv )-ccc-pk6 <-- We need to be here.
-# Successor(kv-bbb) <-- We get here when we call Seek(Successor(kv-bbb))
- ( kv )-bbb-pk5 and we will need to call Prev() (*)
+ ( kv )-ccc-pk6 <-- We get here when we call Seek(Successor(kv-bbb))
+# Successor(kv-bbb)
+ ( kv )-bbb-pk5
( kv )-bbb-pk4
( kv )-bbb-pk3
( kv )-bbb-pk2
( kv )-bbb-pk1
-# ( kv )-bbb <-- We would get here if we called Seek(kv-bbb).
+# ( kv )-bbb <-- We would get here if we called SeekForPrev(kv-bbb).
( kv )-aaa-pk
-(*) - unless Successor(kv-bbb)=(kv-ccc), and Seek(kv-ccc) hits the row. In
-that case, we won't need to call Prev().
-
RocksDB calls:
- Seek(Successor(kv-bbb));
- if (!it->Valid())
- {
- /*
- We may get EOF if rows with 'kv-bbb' (below the Successor... line in the
- diagram) do not exist. This doesn't mean that rows with values kv-ccc
- do not exist.
- */
- it->SeekToLast();
- }
- else
- {
- if (!using_full_key ||
- !kd->value_matches_prefix(...))
- {
- it->Prev();
- }
- }
-
+ SeekForPrev(Successor(kv-bbb));
if (it->Valid() && kd->covers_key(...))
return record.
Note that the code is the same as with HA_READ_KEY_OR_NEXT, except that
we seek to Successor($lookup_key) instead of $lookup_key itself.
-
== HA_READ_BEFORE_KEY, forward CF ==
This is finding max(key) such that key < lookup_tuple.
@@ -224,31 +169,29 @@ Suppose, lookup_tuple=kv-bbb.
( kv )-aaa-pk1
( kv )-aaa-pk2
- ( kv )-aaa-pk3 <-- Need to be here.
+ ( kv )-aaa-pk3 <-- SeekForPrev("kv-bbb") will put us here.
# ( kv )-bbb
- ( kv )-bbb-pk4 <-- Seek("kv-bbb") will put us here.
+ ( kv )-bbb-pk4
( kv )-bbb-pk5
( kv )-bbb-pk6
-1. Seek(kv-bbb) will put us at kv-bbb-pk4 (or return an invalid iterator
- if kv-bbb-pk4 and subsequent rows do not exist in the db).
-2. We will need to call Prev() to get to the record before.
- (if there is no record before kv-bbb, then we can't find a record).
-
-It doesn't matter if we're using a full extended key or not.
+If the lookup tuple is a full key (e.g. kv-bbb-pk3), and the key is present in
+the database, the iterator will be positioned on the key. We will need to call
+Prev() to get the next key.
RocksDB calls:
- it->Seek(kv-bbb);
- if (it->Valid())
+ it->SeekForPrev(kv-bbb);
+ if (it->Valid() && using_full_key &&
+ kd->value_matches_prefix(...))
+ {
+ /* We are using full key and we've hit an exact match */
it->Prev();
- else
- it->SeekToLast();
+ }
if (it->Valid() && kd->covers_key(...))
return record;
-
== HA_READ_BEFORE_KEY, backward CF ==
This is finding max(key) such that key < lookup_tuple.
@@ -269,7 +212,6 @@ Next() to get the next key.
RocksDB calls:
it->Seek(kv-bbb);
-
if (it->Valid() && using_full_key &&
kd->value_matches_prefix(...))
{
@@ -292,19 +234,16 @@ Suppose, lookup_tuple='kv-bbb'
( kv )-bbb-pk4
( kv )-bbb-pk5
( kv )-bbb-pk6
- ( kv )-bbb-pk7 <--- Need to be here.
+ ( kv )-bbb-pk7 <--- SeekForPrev(Successor(kv-bbb)) will get us here
# ( kv )-ccc
- ( kv )-ccc-pk8 <-- Seek(Successor(kv-bbb)) will get us here. will need
- ( kv )-ccc-pk9 to call Prev().
+ ( kv )-ccc-pk8
+ ( kv )-ccc-pk9
RocksDB calls:
- Seek(Successor(kv-bbb));
- if (!it->Valid())
- it->SeekToLast();
- else
+ SeekForPrev(Successor(kv-bbb));
+ if (using_full_key && it->Valid() && !cmp_full_keys(Sucessor(lookup_key)))
it->Prev();
-
if (it->Valid() && kd->covers_key(...))
{
if (!cmp_full_keys(lookup_tuple)) // not needed in _OR_PREV