diff options
author | Sergei Petrunia <psergey@askmonty.org> | 2017-02-06 17:39:08 +0000 |
---|---|---|
committer | Sergei Petrunia <psergey@askmonty.org> | 2017-02-06 17:39:08 +0000 |
commit | 7468ccfadf27a0c5f87c8909ee8514bdd52dc4a7 (patch) | |
tree | b77bc71584e1fe8d727b04a1577dd612da7b9fdd /storage/rocksdb/rdb_datadic.h | |
parent | cfb59f3196aac1b41cdda79952031dcc64042914 (diff) | |
download | mariadb-git-7468ccfadf27a0c5f87c8909ee8514bdd52dc4a7.tar.gz |
Copy of
commit d1bb19b8f751875472211312c8e810143a7ba4b6
Author: Manuel Ung <mung@fb.com>
Date: Fri Feb 3 11:50:34 2017 -0800
Add cardinality stats to information schema
Summary: This adds cardinality stats to the INFORMATION_SCHEMA.ROCKSDB_INDEX_FILE_MAP table. This is the only missing user collected properties from SST files that we don't expose, which is useful for debugging cardinality bugs.
Reviewed By: hermanlee
Differential Revision: D4509156
fbshipit-source-id: 2d3918a
Diffstat (limited to 'storage/rocksdb/rdb_datadic.h')
-rw-r--r-- | storage/rocksdb/rdb_datadic.h | 641 |
1 files changed, 288 insertions, 353 deletions
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 91cb9b8d563..b084752f601 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -56,15 +56,13 @@ class Rdb_ddl_manager; In order to accommodate both cases, we require both calls to be made and unpack_info is passed as context data between the two. */ -class Rdb_pack_field_context -{ - public: - Rdb_pack_field_context(const Rdb_pack_field_context&) = delete; - Rdb_pack_field_context& operator=(const Rdb_pack_field_context&) = delete; +class Rdb_pack_field_context { +public: + Rdb_pack_field_context(const Rdb_pack_field_context &) = delete; + Rdb_pack_field_context &operator=(const Rdb_pack_field_context &) = delete; - explicit Rdb_pack_field_context(Rdb_string_writer* const writer_arg) : - writer(writer_arg) - {} + explicit Rdb_pack_field_context(Rdb_string_writer *const writer_arg) + : writer(writer_arg) {} // NULL means we're not producing unpack_info. Rdb_string_writer *writer; @@ -76,51 +74,50 @@ struct Rdb_collation_codec; C-style "virtual table" allowing different handling of packing logic based on the field type. See Rdb_field_packing::setup() implementation. */ -using rdb_make_unpack_info_t= void (*)(const Rdb_collation_codec *codec, - const Field *field, - Rdb_pack_field_context *pack_ctx); -using rdb_index_field_unpack_t= int (*)(Rdb_field_packing *fpi, Field *field, - uchar *field_ptr, - Rdb_string_reader *reader, - Rdb_string_reader *unpack_reader); -using rdb_index_field_skip_t= int (*)(const Rdb_field_packing *fpi, - const Field *field, - Rdb_string_reader *reader); -using rdb_index_field_pack_t= void (*)(Rdb_field_packing *fpi, Field *field, - uchar* buf, uchar **dst, - Rdb_pack_field_context *pack_ctx); - -const uint RDB_INVALID_KEY_LEN= uint(-1); +using rdb_make_unpack_info_t = void (*)(const Rdb_collation_codec *codec, + const Field *field, + Rdb_pack_field_context *pack_ctx); +using rdb_index_field_unpack_t = int (*)(Rdb_field_packing *fpi, Field *field, + uchar *field_ptr, + Rdb_string_reader *reader, + Rdb_string_reader *unpack_reader); +using rdb_index_field_skip_t = int (*)(const Rdb_field_packing *fpi, + const Field *field, + Rdb_string_reader *reader); +using rdb_index_field_pack_t = void (*)(Rdb_field_packing *fpi, Field *field, + uchar *buf, uchar **dst, + Rdb_pack_field_context *pack_ctx); + +const uint RDB_INVALID_KEY_LEN = uint(-1); /* How much one checksum occupies when stored in the record */ -const size_t RDB_CHECKSUM_SIZE= sizeof(uint32_t); +const size_t RDB_CHECKSUM_SIZE = sizeof(uint32_t); /* How much the checksum data occupies in record, in total. It is storing two checksums plus 1 tag-byte. */ -const size_t RDB_CHECKSUM_CHUNK_SIZE= 2 * RDB_CHECKSUM_SIZE + 1; +const size_t RDB_CHECKSUM_CHUNK_SIZE = 2 * RDB_CHECKSUM_SIZE + 1; /* Checksum data starts from CHECKSUM_DATA_TAG which is followed by two CRC32 checksums. */ -const char RDB_CHECKSUM_DATA_TAG= 0x01; +const char RDB_CHECKSUM_DATA_TAG = 0x01; /* Unpack data is variable length. It is a 1 tag-byte plus a two byte length field. The length field includes the header as well. */ -const char RDB_UNPACK_DATA_TAG= 0x02; -const size_t RDB_UNPACK_DATA_LEN_SIZE= sizeof(uint16_t); -const size_t RDB_UNPACK_HEADER_SIZE= sizeof(RDB_UNPACK_DATA_TAG) + - RDB_UNPACK_DATA_LEN_SIZE; +const char RDB_UNPACK_DATA_TAG = 0x02; +const size_t RDB_UNPACK_DATA_LEN_SIZE = sizeof(uint16_t); +const size_t RDB_UNPACK_HEADER_SIZE = + sizeof(RDB_UNPACK_DATA_TAG) + RDB_UNPACK_DATA_LEN_SIZE; // Possible return values for rdb_index_field_unpack_t functions. enum { - UNPACK_SUCCESS= 0, - UNPACK_FAILURE= 1, - UNPACK_INFO_MISSING= 2, + UNPACK_SUCCESS = 0, + UNPACK_FAILURE = 1, }; /* @@ -163,62 +160,55 @@ enum { reads. */ -class Rdb_key_def -{ +class Rdb_key_def { public: /* Convert a key from KeyTupleFormat to mem-comparable form */ - uint pack_index_tuple(TABLE* const tbl, uchar* const pack_buffer, - uchar* const packed_tuple, - const uchar* const key_tuple, + uint pack_index_tuple(TABLE *const tbl, uchar *const pack_buffer, + uchar *const packed_tuple, const uchar *const key_tuple, const key_part_map &keypart_map) const; /* Convert a key from Table->record format to mem-comparable form */ - uint pack_record(const TABLE* const tbl, uchar* const pack_buffer, - const uchar* const record, - uchar* const packed_tuple, - Rdb_string_writer* const unpack_info, + uint pack_record(const TABLE *const tbl, uchar *const pack_buffer, + const uchar *const record, uchar *const packed_tuple, + Rdb_string_writer *const unpack_info, const bool &should_store_row_debug_checksums, - const longlong &hidden_pk_id= 0, uint n_key_parts= 0, - uint* const n_null_fields= nullptr) const; + const longlong &hidden_pk_id = 0, uint n_key_parts = 0, + uint *const n_null_fields = nullptr) const; /* Pack the hidden primary key into mem-comparable form. */ uint pack_hidden_pk(const longlong &hidden_pk_id, - uchar* const packed_tuple) const; - int unpack_record(TABLE* const table, uchar* const buf, - const rocksdb::Slice* const packed_key, - const rocksdb::Slice* const unpack_info, - const bool &verify_row_debug_checksums) - const; - - static bool unpack_info_has_checksum(const rocksdb::Slice& unpack_info); + uchar *const packed_tuple) const; + int unpack_record(TABLE *const table, uchar *const buf, + const rocksdb::Slice *const packed_key, + const rocksdb::Slice *const unpack_info, + const bool &verify_row_debug_checksums) const; + + static bool unpack_info_has_checksum(const rocksdb::Slice &unpack_info); int compare_keys(const rocksdb::Slice *key1, const rocksdb::Slice *key2, - std::size_t* const column_index) const; + std::size_t *const column_index) const; - size_t key_length(const TABLE* const table, const rocksdb::Slice &key) const; + size_t key_length(const TABLE *const table, const rocksdb::Slice &key) const; /* Get the key that is the "infimum" for this index */ - inline void get_infimum_key(uchar* const key, uint* const size) const - { + inline void get_infimum_key(uchar *const key, uint *const size) const { rdb_netbuf_store_index(key, m_index_number); - *size= INDEX_NUMBER_SIZE; + *size = INDEX_NUMBER_SIZE; } /* Get the key that is a "supremum" for this index */ - inline void get_supremum_key(uchar* const key, uint* const size) const - { - rdb_netbuf_store_index(key, m_index_number+1); - *size= INDEX_NUMBER_SIZE; + inline void get_supremum_key(uchar *const key, uint *const size) const { + rdb_netbuf_store_index(key, m_index_number + 1); + *size = INDEX_NUMBER_SIZE; } /* Make a key that is right after the given key. */ - static int successor(uchar* const packed_tuple, const uint &len); + static int successor(uchar *const packed_tuple, const uint &len); /* This can be used to compare prefixes. if X is a prefix of Y, then we consider that X = Y. */ // b describes the lookup key, which can be a prefix of a. - int cmp_full_keys(const rocksdb::Slice& a, const rocksdb::Slice& b) const - { + int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const { DBUG_ASSERT(covers_key(a)); DBUG_ASSERT(covers_key(b)); @@ -226,8 +216,7 @@ public: } /* Check if given mem-comparable key belongs to this index */ - bool covers_key(const rocksdb::Slice &slice) const - { + bool covers_key(const rocksdb::Slice &slice) const { if (slice.size() < INDEX_NUMBER_SIZE) return false; @@ -244,43 +233,35 @@ public: form) */ bool value_matches_prefix(const rocksdb::Slice &value, - const rocksdb::Slice &prefix) const - { + const rocksdb::Slice &prefix) const { return covers_key(value) && !cmp_full_keys(value, prefix); } - uint32 get_keyno() const - { - return m_keyno; - } + uint32 get_keyno() const { return m_keyno; } - uint32 get_index_number() const - { - return m_index_number; - } + uint32 get_index_number() const { return m_index_number; } - GL_INDEX_ID get_gl_index_id() const - { - const GL_INDEX_ID gl_index_id = { m_cf_handle->GetID(), m_index_number }; + GL_INDEX_ID get_gl_index_id() const { + const GL_INDEX_ID gl_index_id = {m_cf_handle->GetID(), m_index_number}; return gl_index_id; } + int read_memcmp_key_part(const TABLE *table_arg, Rdb_string_reader *reader, + const uint part_num) const; + /* Must only be called for secondary keys: */ - uint get_primary_key_tuple(const TABLE* const tbl, - const Rdb_key_def& pk_descr, - const rocksdb::Slice* const key, - uchar* const pk_buffer) const; + uint get_primary_key_tuple(const TABLE *const tbl, + const Rdb_key_def &pk_descr, + const rocksdb::Slice *const key, + uchar *const pk_buffer) const; + + uint get_memcmp_sk_parts(const TABLE *table, const rocksdb::Slice &key, + uchar *sk_buffer, uint *n_null_fields) const; /* Return max length of mem-comparable form */ - uint max_storage_fmt_length() const - { - return m_maxlength; - } + uint max_storage_fmt_length() const { return m_maxlength; } - uint get_key_parts() const - { - return m_key_parts; - } + uint get_key_parts() const { return m_key_parts; } /* Get a field object for key part #part_no @@ -292,60 +273,60 @@ public: Internally, we always extend all indexes with PK columns. This function uses our definition of how the index is Extended. */ - inline Field* get_table_field_for_part_no(TABLE *table, uint part_no) const; + inline Field *get_table_field_for_part_no(TABLE *table, uint part_no) const; + + const std::string &get_name() const { return m_name; } - const std::string& get_name() const { - return m_name; + const rocksdb::SliceTransform *get_extractor() const { + return m_prefix_extractor.get(); } - Rdb_key_def& operator=(const Rdb_key_def&) = delete; - Rdb_key_def(const Rdb_key_def& k); + Rdb_key_def &operator=(const Rdb_key_def &) = delete; + Rdb_key_def(const Rdb_key_def &k); Rdb_key_def(uint indexnr_arg, uint keyno_arg, - rocksdb::ColumnFamilyHandle* cf_handle_arg, - uint16_t index_dict_version_arg, - uchar index_type_arg, - uint16_t kv_format_version_arg, - bool is_reverse_cf_arg, bool is_auto_cf_arg, - const char* name, - Rdb_index_stats stats= Rdb_index_stats()); + rocksdb::ColumnFamilyHandle *cf_handle_arg, + uint16_t index_dict_version_arg, uchar index_type_arg, + uint16_t kv_format_version_arg, bool is_reverse_cf_arg, + bool is_auto_cf_arg, const char *name, + Rdb_index_stats stats = Rdb_index_stats()); ~Rdb_key_def(); enum { - INDEX_NUMBER_SIZE= 4, - VERSION_SIZE= 2, - CF_NUMBER_SIZE= 4, - CF_FLAG_SIZE= 4, - PACKED_SIZE= 4, // one int + INDEX_NUMBER_SIZE = 4, + VERSION_SIZE = 2, + CF_NUMBER_SIZE = 4, + CF_FLAG_SIZE = 4, + PACKED_SIZE = 4, // one int }; // bit flags for combining bools when writing to disk enum { - REVERSE_CF_FLAG= 1, - AUTO_CF_FLAG= 2, + REVERSE_CF_FLAG = 1, + AUTO_CF_FLAG = 2, }; // Data dictionary types enum DATA_DICT_TYPE { - DDL_ENTRY_INDEX_START_NUMBER= 1, - INDEX_INFO= 2, - CF_DEFINITION= 3, - BINLOG_INFO_INDEX_NUMBER= 4, - DDL_DROP_INDEX_ONGOING= 5, - INDEX_STATISTICS= 6, - MAX_INDEX_ID= 7, - DDL_CREATE_INDEX_ONGOING= 8, - END_DICT_INDEX_ID= 255 + DDL_ENTRY_INDEX_START_NUMBER = 1, + INDEX_INFO = 2, + CF_DEFINITION = 3, + BINLOG_INFO_INDEX_NUMBER = 4, + DDL_DROP_INDEX_ONGOING = 5, + INDEX_STATISTICS = 6, + MAX_INDEX_ID = 7, + DDL_CREATE_INDEX_ONGOING = 8, + END_DICT_INDEX_ID = 255 }; // Data dictionary schema version. Introduce newer versions // if changing schema layout enum { - DDL_ENTRY_INDEX_VERSION= 1, - CF_DEFINITION_VERSION= 1, - BINLOG_INFO_INDEX_NUMBER_VERSION= 1, - DDL_DROP_INDEX_ONGOING_VERSION= 1, - MAX_INDEX_ID_VERSION= 1, - DDL_CREATE_INDEX_ONGOING_VERSION= 1, + DDL_ENTRY_INDEX_VERSION = 1, + CF_DEFINITION_VERSION = 1, + BINLOG_INFO_INDEX_NUMBER_VERSION = 1, + DDL_DROP_INDEX_ONGOING_VERSION = 1, + MAX_INDEX_ID_VERSION = 1, + DDL_CREATE_INDEX_ONGOING_VERSION = 1, // Version for index stats is stored in IndexStats struct }; @@ -353,7 +334,7 @@ public: // INDEX_INFO layout. Update INDEX_INFO_VERSION_LATEST to point to the // latest version number. enum { - INDEX_INFO_VERSION_INITIAL= 1, // Obsolete + INDEX_INFO_VERSION_INITIAL = 1, // Obsolete INDEX_INFO_VERSION_KV_FORMAT, INDEX_INFO_VERSION_GLOBAL_ID, // There is no change to data format in this version, but this version @@ -362,35 +343,35 @@ public: // check inadvertently. INDEX_INFO_VERSION_VERIFY_KV_FORMAT, // This normally point to the latest (currently it does). - INDEX_INFO_VERSION_LATEST= INDEX_INFO_VERSION_VERIFY_KV_FORMAT, + INDEX_INFO_VERSION_LATEST = INDEX_INFO_VERSION_VERIFY_KV_FORMAT, }; // MyRocks index types enum { - INDEX_TYPE_PRIMARY= 1, - INDEX_TYPE_SECONDARY= 2, - INDEX_TYPE_HIDDEN_PRIMARY= 3, + INDEX_TYPE_PRIMARY = 1, + INDEX_TYPE_SECONDARY = 2, + INDEX_TYPE_HIDDEN_PRIMARY = 3, }; // Key/Value format version for each index type enum { - PRIMARY_FORMAT_VERSION_INITIAL= 10, + PRIMARY_FORMAT_VERSION_INITIAL = 10, // This change includes: // - For columns that can be unpacked with unpack_info, PK // stores the unpack_info. // - DECIMAL datatype is no longer stored in the row (because // it can be decoded from its mem-comparable form) // - VARCHAR-columns use endspace-padding. - PRIMARY_FORMAT_VERSION_UPDATE1= 11, - PRIMARY_FORMAT_VERSION_LATEST= PRIMARY_FORMAT_VERSION_UPDATE1, + PRIMARY_FORMAT_VERSION_UPDATE1 = 11, + PRIMARY_FORMAT_VERSION_LATEST = PRIMARY_FORMAT_VERSION_UPDATE1, - SECONDARY_FORMAT_VERSION_INITIAL= 10, + SECONDARY_FORMAT_VERSION_INITIAL = 10, // This change the SK format to include unpack_info. - SECONDARY_FORMAT_VERSION_UPDATE1= 11, - SECONDARY_FORMAT_VERSION_LATEST= SECONDARY_FORMAT_VERSION_UPDATE1, + SECONDARY_FORMAT_VERSION_UPDATE1 = 11, + SECONDARY_FORMAT_VERSION_LATEST = SECONDARY_FORMAT_VERSION_UPDATE1, }; - void setup(const TABLE* const table, const Rdb_tbl_def* const tbl_def); + void setup(const TABLE *const table, const Rdb_tbl_def *const tbl_def); rocksdb::ColumnFamilyHandle *get_cf() const { return m_cf_handle; } @@ -400,9 +381,9 @@ public: inline bool has_unpack_info(const uint &kp) const; /* Check if given table has a primary key */ - static bool table_has_hidden_pk(const TABLE* const table); + static bool table_has_hidden_pk(const TABLE *const table); - void report_checksum_mismatch(const bool &is_key, const char* const data, + void report_checksum_mismatch(const bool &is_key, const char *const data, const size_t data_size) const; /* Check if index is at least pk_min if it is a PK, @@ -410,21 +391,19 @@ public: bool index_format_min_check(const int &pk_min, const int &sk_min) const; private: - #ifndef DBUG_OFF - inline bool is_storage_available(const int &offset, const int &needed) const - { - const int storage_length= static_cast<int>(max_storage_fmt_length()); + inline bool is_storage_available(const int &offset, const int &needed) const { + const int storage_length = static_cast<int>(max_storage_fmt_length()); return (storage_length - offset) >= needed; } -#endif // DBUG_OFF +#endif // DBUG_OFF /* Global number of this index (used as prefix in StorageFormat) */ const uint32 m_index_number; uchar m_index_number_storage_form[INDEX_NUMBER_SIZE]; - rocksdb::ColumnFamilyHandle* m_cf_handle; + rocksdb::ColumnFamilyHandle *m_cf_handle; public: uint16_t m_index_dict_version; @@ -437,9 +416,9 @@ public: bool m_is_auto_cf; std::string m_name; mutable Rdb_index_stats m_stats; -private: - friend class Rdb_tbl_def; // for m_index_number above +private: + friend class Rdb_tbl_def; // for m_index_number above /* Number of key parts in the primary key*/ uint m_pk_key_parts; @@ -461,6 +440,9 @@ private: */ uint m_key_parts; + /* Prefix extractor for the column family of the key definiton */ + std::shared_ptr<const rocksdb::SliceTransform> m_prefix_extractor; + /* Maximum length of the mem-comparable form. */ uint m_maxlength; @@ -485,8 +467,7 @@ private: // // We have m_dec_idx[idx][dst] = src to get our original character back. // -struct Rdb_collation_codec -{ +struct Rdb_collation_codec { const my_core::CHARSET_INFO *m_cs; // The first element unpacks VARCHAR(n), the second one - CHAR(n). std::array<rdb_make_unpack_info_t, 2> m_make_unpack_info_func; @@ -501,15 +482,13 @@ struct Rdb_collation_codec extern mysql_mutex_t rdb_collation_data_mutex; extern mysql_mutex_t rdb_mem_cmp_space_mutex; -extern std::array<const Rdb_collation_codec*, MY_ALL_CHARSETS_SIZE> - rdb_collation_data; - +extern std::array<const Rdb_collation_codec *, MY_ALL_CHARSETS_SIZE> + rdb_collation_data; -class Rdb_field_packing -{ +class Rdb_field_packing { public: - Rdb_field_packing(const Rdb_field_packing&) = delete; - Rdb_field_packing& operator=(const Rdb_field_packing&) = delete; + Rdb_field_packing(const Rdb_field_packing &) = delete; + Rdb_field_packing &operator=(const Rdb_field_packing &) = delete; Rdb_field_packing() = default; /* Length of mem-comparable image of the field, in bytes */ @@ -527,25 +506,22 @@ public: const CHARSET_INFO *m_varchar_charset; // (Valid when Variable Length Space Padded Encoding is used): - uint m_segment_size; // size of segment used + uint m_segment_size; // size of segment used // number of bytes used to store number of trimmed (or added) // spaces in the upack_info bool m_unpack_info_uses_two_bytes; - const std::vector<uchar>* space_xfrm; + const std::vector<uchar> *space_xfrm; size_t space_xfrm_len; size_t space_mb_len; - const Rdb_collation_codec* m_charset_codec; + const Rdb_collation_codec *m_charset_codec; /* @return TRUE: this field makes use of unpack_info. */ - bool uses_unpack_info() const - { - return (m_make_unpack_info_func != nullptr); - } + bool uses_unpack_info() const { return (m_make_unpack_info_func != nullptr); } /* TRUE means unpack_info stores the original field value */ bool m_unpack_info_stores_value; @@ -591,11 +567,12 @@ private: */ uint m_keynr; uint m_key_part; + public: - bool setup(const Rdb_key_def* const key_descr, const Field* const field, + bool setup(const Rdb_key_def *const key_descr, const Field *const field, const uint &keynr_arg, const uint &key_part_arg, const uint16 &key_length); - Field *get_field_in_table(const TABLE* const tbl) const; + Field *get_field_in_table(const TABLE *const tbl) const; void fill_hidden_pk_val(uchar **dst, const longlong &hidden_pk_id) const; }; @@ -606,11 +583,10 @@ public: For encoding/decoding of index tuples, see Rdb_key_def. */ -class Rdb_field_encoder -{ - public: - Rdb_field_encoder(const Rdb_field_encoder&) = delete; - Rdb_field_encoder& operator=(const Rdb_field_encoder&) = delete; +class Rdb_field_encoder { +public: + Rdb_field_encoder(const Rdb_field_encoder &) = delete; + Rdb_field_encoder &operator=(const Rdb_field_encoder &) = delete; /* STORE_NONE is set when a column can be decoded solely from their mem-comparable form. @@ -629,7 +605,7 @@ class Rdb_field_encoder uint m_null_offset; uint16 m_field_index; - uchar m_null_mask; // 0 means the field cannot be null + uchar m_null_mask; // 0 means the field cannot be null my_core::enum_field_types m_field_type; @@ -637,33 +613,28 @@ class Rdb_field_encoder bool maybe_null() const { return m_null_mask != 0; } - bool uses_variable_len_encoding() const - { + bool uses_variable_len_encoding() const { return (m_field_type == MYSQL_TYPE_BLOB || m_field_type == MYSQL_TYPE_VARCHAR); } }; -inline Field* Rdb_key_def::get_table_field_for_part_no(TABLE *table, - uint part_no) const -{ +inline Field *Rdb_key_def::get_table_field_for_part_no(TABLE *table, + uint part_no) const { DBUG_ASSERT(part_no < get_key_parts()); return m_pack_info[part_no].get_field_in_table(table); } -inline bool Rdb_key_def::can_unpack(const uint &kp) const -{ +inline bool Rdb_key_def::can_unpack(const uint &kp) const { DBUG_ASSERT(kp < m_key_parts); return (m_pack_info[kp].m_unpack_func != nullptr); } -inline bool Rdb_key_def::has_unpack_info(const uint &kp) const -{ +inline bool Rdb_key_def::has_unpack_info(const uint &kp) const { DBUG_ASSERT(kp < m_key_parts); return m_pack_info[kp].uses_unpack_info(); } - /* A table definition. This is an entry in the mapping @@ -673,9 +644,8 @@ inline bool Rdb_key_def::has_unpack_info(const uint &kp) const That's why we keep auto_increment value here, too. */ -class Rdb_tbl_def -{ - private: +class Rdb_tbl_def { +private: void check_if_is_mysql_system_table(); /* Stores 'dbname.tablename' */ @@ -686,27 +656,24 @@ class Rdb_tbl_def std::string m_tablename; std::string m_partition; - void set_name(const std::string& name); + void set_name(const std::string &name); - public: - Rdb_tbl_def(const Rdb_tbl_def&) = delete; - Rdb_tbl_def& operator=(const Rdb_tbl_def&) = delete; +public: + Rdb_tbl_def(const Rdb_tbl_def &) = delete; + Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete; - explicit Rdb_tbl_def(const std::string& name) : - m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) - { + explicit Rdb_tbl_def(const std::string &name) + : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { set_name(name); } - Rdb_tbl_def(const char* const name, const size_t &len) : - m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) - { + Rdb_tbl_def(const char *const name, const size_t &len) + : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { set_name(std::string(name, len)); } - explicit Rdb_tbl_def(const rocksdb::Slice& slice, const size_t &pos= 0) : - m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) - { + explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0) + : m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) { set_name(std::string(slice.data() + pos, slice.size() - pos)); } @@ -716,7 +683,7 @@ class Rdb_tbl_def uint m_key_count; /* Array of index descriptors */ - std::shared_ptr<Rdb_key_def>* m_key_descr_arr; + std::shared_ptr<Rdb_key_def> *m_key_descr_arr; std::atomic<longlong> m_hidden_pk_val; std::atomic<longlong> m_auto_incr_val; @@ -724,52 +691,44 @@ class Rdb_tbl_def /* Is this a system table */ bool m_is_mysql_system_table; - bool put_dict(Rdb_dict_manager* const dict, rocksdb::WriteBatch* const batch, - uchar* const key, const size_t &keylen); + bool put_dict(Rdb_dict_manager *const dict, rocksdb::WriteBatch *const batch, + uchar *const key, const size_t &keylen); - const std::string& full_tablename() const { return m_dbname_tablename; } - const std::string& base_dbname() const { return m_dbname; } - const std::string& base_tablename() const { return m_tablename; } - const std::string& base_partition() const { return m_partition; } + const std::string &full_tablename() const { return m_dbname_tablename; } + const std::string &base_dbname() const { return m_dbname; } + const std::string &base_tablename() const { return m_tablename; } + const std::string &base_partition() const { return m_partition; } }; - /* A thread-safe sequential number generator. Its performance is not a concern hence it is ok to protect it by a mutex. */ -class Rdb_seq_generator -{ - uint m_next_number= 0; +class Rdb_seq_generator { + uint m_next_number = 0; mysql_mutex_t m_mutex; + public: - Rdb_seq_generator(const Rdb_seq_generator&) = delete; - Rdb_seq_generator& operator=(const Rdb_seq_generator&) = delete; + Rdb_seq_generator(const Rdb_seq_generator &) = delete; + Rdb_seq_generator &operator=(const Rdb_seq_generator &) = delete; Rdb_seq_generator() = default; - void init(const uint &initial_number) - { - mysql_mutex_init(0 , &m_mutex, MY_MUTEX_INIT_FAST); - m_next_number= initial_number; + void init(const uint &initial_number) { + mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); + m_next_number = initial_number; } - uint get_and_update_next_number(Rdb_dict_manager* const dict); + uint get_and_update_next_number(Rdb_dict_manager *const dict); - void cleanup() - { - mysql_mutex_destroy(&m_mutex); - } + void cleanup() { mysql_mutex_destroy(&m_mutex); } }; - -interface Rdb_tables_scanner -{ - virtual int add_table(Rdb_tbl_def* tdef) =0; +interface Rdb_tables_scanner { + virtual int add_table(Rdb_tbl_def * tdef) = 0; }; - /* This contains a mapping of @@ -778,10 +737,9 @@ interface Rdb_tables_scanner objects are shared among all threads. */ -class Rdb_ddl_manager -{ - Rdb_dict_manager *m_dict= nullptr; - my_core::HASH m_ddl_hash; // Contains Rdb_tbl_def elements +class Rdb_ddl_manager { + Rdb_dict_manager *m_dict = nullptr; + my_core::HASH m_ddl_hash; // Contains Rdb_tbl_def elements // maps index id to <table_name, index number> std::map<GL_INDEX_ID, std::pair<std::string, uint>> m_index_num_to_keydef; mysql_rwlock_t m_rwlock; @@ -792,58 +750,56 @@ class Rdb_ddl_manager // and consumed by the rocksdb background thread std::map<GL_INDEX_ID, Rdb_index_stats> m_stats2store; - const std::shared_ptr<Rdb_key_def>& find( - GL_INDEX_ID gl_index_id); + const std::shared_ptr<Rdb_key_def> &find(GL_INDEX_ID gl_index_id); + public: - Rdb_ddl_manager(const Rdb_ddl_manager&) = delete; - Rdb_ddl_manager& operator=(const Rdb_ddl_manager&) = delete; + Rdb_ddl_manager(const Rdb_ddl_manager &) = delete; + Rdb_ddl_manager &operator=(const Rdb_ddl_manager &) = delete; Rdb_ddl_manager() {} /* Load the data dictionary from on-disk storage */ - bool init(Rdb_dict_manager* const dict_arg, Rdb_cf_manager* const cf_manager, + bool init(Rdb_dict_manager *const dict_arg, Rdb_cf_manager *const cf_manager, const uint32_t &validate_tables); void cleanup(); - Rdb_tbl_def* find(const std::string& table_name, const bool &lock= true); + Rdb_tbl_def *find(const std::string &table_name, const bool &lock = true); std::shared_ptr<const Rdb_key_def> safe_find(GL_INDEX_ID gl_index_id); - void set_stats( - const std::unordered_map<GL_INDEX_ID, Rdb_index_stats>& stats); - void adjust_stats( - const std::vector<Rdb_index_stats>& new_data, - const std::vector<Rdb_index_stats>& deleted_data - =std::vector<Rdb_index_stats>()); + void set_stats(const std::unordered_map<GL_INDEX_ID, Rdb_index_stats> &stats); + void adjust_stats(const std::vector<Rdb_index_stats> &new_data, + const std::vector<Rdb_index_stats> &deleted_data = + std::vector<Rdb_index_stats>()); void persist_stats(const bool &sync = false); /* Modify the mapping and write it to on-disk storage */ - int put_and_write(Rdb_tbl_def* const key_descr, - rocksdb::WriteBatch* const batch); - void remove(Rdb_tbl_def* const rec, rocksdb::WriteBatch* const batch, - const bool &lock= true); - bool rename(const std::string& from, const std::string& to, - rocksdb::WriteBatch* const batch); - - uint get_and_update_next_number(Rdb_dict_manager* const dict) - { return m_sequence.get_and_update_next_number(dict); } + int put_and_write(Rdb_tbl_def *const key_descr, + rocksdb::WriteBatch *const batch); + void remove(Rdb_tbl_def *const rec, rocksdb::WriteBatch *const batch, + const bool &lock = true); + bool rename(const std::string &from, const std::string &to, + rocksdb::WriteBatch *const batch); + + uint get_and_update_next_number(Rdb_dict_manager *const dict) { + return m_sequence.get_and_update_next_number(dict); + } /* Walk the data dictionary */ - int scan_for_tables(Rdb_tables_scanner* tables_scanner); + int scan_for_tables(Rdb_tables_scanner *tables_scanner); void erase_index_num(const GL_INDEX_ID &gl_index_id); private: /* Put the data into in-memory table (only) */ - int put(Rdb_tbl_def* const key_descr, const bool &lock= true); + int put(Rdb_tbl_def *const key_descr, const bool &lock = true); /* Helper functions to be passed to my_core::HASH object */ - static const uchar* get_hash_key(Rdb_tbl_def* const rec, size_t* const length, - my_bool not_used __attribute__((unused))); - static void free_hash_elem(void* const data); + static const uchar *get_hash_key(Rdb_tbl_def *const rec, size_t *const length, + my_bool not_used MY_ATTRIBUTE((unused))); + static void free_hash_elem(void *const data); bool validate_schemas(); }; - /* Writing binlog information into RocksDB at commit(), and retrieving binlog information at crash recovery. @@ -859,40 +815,37 @@ private: binlog_gtid_length (2 byte form) binlog_gtid */ -class Rdb_binlog_manager -{ +class Rdb_binlog_manager { public: - Rdb_binlog_manager(const Rdb_binlog_manager&) = delete; - Rdb_binlog_manager& operator=(const Rdb_binlog_manager&) = delete; + Rdb_binlog_manager(const Rdb_binlog_manager &) = delete; + Rdb_binlog_manager &operator=(const Rdb_binlog_manager &) = delete; Rdb_binlog_manager() = default; - bool init(Rdb_dict_manager* const dict); + bool init(Rdb_dict_manager *const dict); void cleanup(); - void update(const char* const binlog_name, const my_off_t binlog_pos, - const char* const binlog_max_gtid, - rocksdb::WriteBatchBase* const batch); - bool read(char* const binlog_name, my_off_t* const binlog_pos, - char* const binlog_gtid) const; - void update_slave_gtid_info(const uint &id, const char* const db, - const char* const gtid, - rocksdb::WriteBatchBase* const write_batch); + void update(const char *const binlog_name, const my_off_t binlog_pos, + const char *const binlog_max_gtid, + rocksdb::WriteBatchBase *const batch); + bool read(char *const binlog_name, my_off_t *const binlog_pos, + char *const binlog_gtid) const; + void update_slave_gtid_info(const uint &id, const char *const db, + const char *const gtid, + rocksdb::WriteBatchBase *const write_batch); private: - Rdb_dict_manager *m_dict= nullptr; - uchar m_key_buf[Rdb_key_def::INDEX_NUMBER_SIZE]= {0}; + Rdb_dict_manager *m_dict = nullptr; + uchar m_key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; rocksdb::Slice m_key_slice; - rocksdb::Slice pack_value(uchar* const buf, - const char* const binlog_name, + rocksdb::Slice pack_value(uchar *const buf, const char *const binlog_name, const my_off_t &binlog_pos, - const char* const binlog_gtid) const; - bool unpack_value(const uchar* const value, char* const binlog_name, - my_off_t* const binlog_pos, char* const binlog_gtid) const; + const char *const binlog_gtid) const; + bool unpack_value(const uchar *const value, char *const binlog_name, + my_off_t *const binlog_pos, char *const binlog_gtid) const; - std::atomic<Rdb_tbl_def*> m_slave_gtid_info_tbl; + std::atomic<Rdb_tbl_def *> m_slave_gtid_info_tbl; }; - /* Rdb_dict_manager manages how MySQL on RocksDB (MyRocks) stores its internal data dictionary. @@ -944,58 +897,49 @@ private: begin() and commit() to make it easier to do atomic operations. */ -class Rdb_dict_manager -{ +class Rdb_dict_manager { private: mysql_mutex_t m_mutex; - rocksdb::DB *m_db= nullptr; - rocksdb::ColumnFamilyHandle *m_system_cfh= nullptr; + rocksdb::DB *m_db = nullptr; + rocksdb::ColumnFamilyHandle *m_system_cfh = nullptr; /* Utility to put INDEX_INFO and CF_DEFINITION */ - uchar m_key_buf_max_index_id[Rdb_key_def::INDEX_NUMBER_SIZE]= {0}; + uchar m_key_buf_max_index_id[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; rocksdb::Slice m_key_slice_max_index_id; - static void dump_index_id(uchar* const netbuf, + static void dump_index_id(uchar *const netbuf, Rdb_key_def::DATA_DICT_TYPE dict_type, const GL_INDEX_ID &gl_index_id); - void delete_with_prefix(rocksdb::WriteBatch* const batch, + void delete_with_prefix(rocksdb::WriteBatch *const batch, Rdb_key_def::DATA_DICT_TYPE dict_type, const GL_INDEX_ID &gl_index_id) const; /* Functions for fast DROP TABLE/INDEX */ void resume_drop_indexes() const; - void log_start_drop_table(const std::shared_ptr<Rdb_key_def>* const key_descr, + void log_start_drop_table(const std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, - const char* const log_action) const; + const char *const log_action) const; void log_start_drop_index(GL_INDEX_ID gl_index_id, - const char* log_action) const; + const char *log_action) const; + public: - Rdb_dict_manager(const Rdb_dict_manager&) = delete; - Rdb_dict_manager& operator=(const Rdb_dict_manager&) = delete; + Rdb_dict_manager(const Rdb_dict_manager &) = delete; + Rdb_dict_manager &operator=(const Rdb_dict_manager &) = delete; Rdb_dict_manager() = default; - bool init(rocksdb::DB* const rdb_dict, Rdb_cf_manager* const cf_manager); + bool init(rocksdb::DB *const rdb_dict, Rdb_cf_manager *const cf_manager); - inline void cleanup() - { - mysql_mutex_destroy(&m_mutex); - } + inline void cleanup() { mysql_mutex_destroy(&m_mutex); } - inline void lock() - { - mysql_mutex_lock(&m_mutex); - } + inline void lock() { mysql_mutex_lock(&m_mutex); } - inline void unlock() - { - mysql_mutex_unlock(&m_mutex); - } + inline void unlock() { mysql_mutex_unlock(&m_mutex); } /* Raw RocksDB operations */ std::unique_ptr<rocksdb::WriteBatch> begin() const; - int commit(rocksdb::WriteBatch* const batch, const bool &sync = true) const; - rocksdb::Status get_value(const rocksdb::Slice& key, - std::string* const value) const; - void put_key(rocksdb::WriteBatchBase* const batch, const rocksdb::Slice &key, + int commit(rocksdb::WriteBatch *const batch, const bool &sync = true) const; + rocksdb::Status get_value(const rocksdb::Slice &key, + std::string *const value) const; + void put_key(rocksdb::WriteBatchBase *const batch, const rocksdb::Slice &key, const rocksdb::Slice &value) const; void delete_key(rocksdb::WriteBatchBase *batch, const rocksdb::Slice &key) const; @@ -1007,95 +951,86 @@ public: const uint16_t kv_version, const uint index_id, const uint cf_id) const; - void delete_index_info(rocksdb::WriteBatch* batch, + void delete_index_info(rocksdb::WriteBatch *batch, const GL_INDEX_ID &index_id) const; bool get_index_info(const GL_INDEX_ID &gl_index_id, - uint16_t *index_dict_version, - uchar *index_type, uint16_t *kv_version) const; + uint16_t *index_dict_version, uchar *index_type, + uint16_t *kv_version) const; /* CF id => CF flags */ - void add_cf_flags(rocksdb::WriteBatch* const batch, - const uint &cf_id, + void add_cf_flags(rocksdb::WriteBatch *const batch, const uint &cf_id, const uint &cf_flags) const; - bool get_cf_flags(const uint &cf_id, uint* const cf_flags) const; + bool get_cf_flags(const uint &cf_id, uint *const cf_flags) const; /* Functions for fast CREATE/DROP TABLE/INDEX */ - void get_ongoing_index_operation(std::vector<GL_INDEX_ID>* gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const; - bool is_index_operation_ongoing(const GL_INDEX_ID& gl_index_id, + void + get_ongoing_index_operation(std::unordered_set<GL_INDEX_ID> *gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const; + bool is_index_operation_ongoing(const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const; - void start_ongoing_index_operation(rocksdb::WriteBatch* batch, - const GL_INDEX_ID& gl_index_id, + void start_ongoing_index_operation(rocksdb::WriteBatch *batch, + const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const; - void end_ongoing_index_operation(rocksdb::WriteBatch* const batch, - const GL_INDEX_ID& gl_index_id, + void end_ongoing_index_operation(rocksdb::WriteBatch *const batch, + const GL_INDEX_ID &gl_index_id, Rdb_key_def::DATA_DICT_TYPE dd_type) const; bool is_drop_index_empty() const; - void add_drop_table(std::shared_ptr<Rdb_key_def>* const key_descr, + void add_drop_table(std::shared_ptr<Rdb_key_def> *const key_descr, const uint32 &n_keys, - rocksdb::WriteBatch* const batch) const; - void add_drop_index(const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - rocksdb::WriteBatch* const batch) const; - void add_create_index(const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - rocksdb::WriteBatch* const batch) const; - void finish_indexes_operation( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids, - Rdb_key_def::DATA_DICT_TYPE dd_type) const; + rocksdb::WriteBatch *const batch) const; + void add_drop_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + rocksdb::WriteBatch *const batch) const; + void add_create_index(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + rocksdb::WriteBatch *const batch) const; + void + finish_indexes_operation(const std::unordered_set<GL_INDEX_ID> &gl_index_ids, + Rdb_key_def::DATA_DICT_TYPE dd_type) const; void rollback_ongoing_index_creation() const; - inline void - get_ongoing_drop_indexes(std::vector<GL_INDEX_ID>* gl_index_ids) const - { + inline void get_ongoing_drop_indexes( + std::unordered_set<GL_INDEX_ID> *gl_index_ids) const { get_ongoing_index_operation(gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } - inline void - get_ongoing_create_indexes(std::vector<GL_INDEX_ID>* gl_index_ids) const - { + inline void get_ongoing_create_indexes( + std::unordered_set<GL_INDEX_ID> *gl_index_ids) const { get_ongoing_index_operation(gl_index_ids, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } inline void start_drop_index(rocksdb::WriteBatch *wb, - const GL_INDEX_ID& gl_index_id) const - { + const GL_INDEX_ID &gl_index_id) const { start_ongoing_index_operation(wb, gl_index_id, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } inline void start_create_index(rocksdb::WriteBatch *wb, - const GL_INDEX_ID& gl_index_id) const - { + const GL_INDEX_ID &gl_index_id) const { start_ongoing_index_operation(wb, gl_index_id, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } inline void finish_drop_indexes( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids) const - { - finish_indexes_operation(gl_index_ids, - Rdb_key_def::DDL_DROP_INDEX_ONGOING); + const std::unordered_set<GL_INDEX_ID> &gl_index_ids) const { + finish_indexes_operation(gl_index_ids, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } inline void finish_create_indexes( - const std::unordered_set<GL_INDEX_ID>& gl_index_ids) const - { + const std::unordered_set<GL_INDEX_ID> &gl_index_ids) const { finish_indexes_operation(gl_index_ids, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } - inline bool is_drop_index_ongoing(const GL_INDEX_ID& gl_index_id) const - { + inline bool is_drop_index_ongoing(const GL_INDEX_ID &gl_index_id) const { return is_index_operation_ongoing(gl_index_id, Rdb_key_def::DDL_DROP_INDEX_ONGOING); } - inline bool is_create_index_ongoing(const GL_INDEX_ID& gl_index_id) const - { + inline bool is_create_index_ongoing(const GL_INDEX_ID &gl_index_id) const { return is_index_operation_ongoing(gl_index_id, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); } - bool get_max_index_id(uint32_t* const index_id) const; - bool update_max_index_id(rocksdb::WriteBatch* const batch, + bool get_max_index_id(uint32_t *const index_id) const; + bool update_max_index_id(rocksdb::WriteBatch *const batch, const uint32_t &index_id) const; - void add_stats(rocksdb::WriteBatch* const batch, - const std::vector<Rdb_index_stats>& stats) const; + void add_stats(rocksdb::WriteBatch *const batch, + const std::vector<Rdb_index_stats> &stats) const; Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const; }; -} // namespace myrocks +} // namespace myrocks |