diff options
author | Aleksey Midenkov <midenok@gmail.com> | 2017-11-07 00:37:49 +0300 |
---|---|---|
committer | Aleksey Midenkov <midenok@gmail.com> | 2017-11-07 00:37:49 +0300 |
commit | d8d725101992d50b00edf149e7ccd7f720b850dc (patch) | |
tree | 5797a3c9af2d2db487ec77093608f89b7dabe62f /sql | |
parent | 835cbbcc7b797188a89671019f2b2844e1a14e0c (diff) | |
parent | ce66d5b2a53d76d286e8443807c4ebd7743cc354 (diff) | |
download | mariadb-git-d8d725101992d50b00edf149e7ccd7f720b850dc.tar.gz |
System Versioning pre0.12
Merge remote-tracking branch 'origin/archive/2017-10-17' into 10.3
Diffstat (limited to 'sql')
80 files changed, 7452 insertions, 344 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 8a03692598d..270c0d82993 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -141,6 +141,7 @@ SET (SQL_SOURCE sql_type.cc item_windowfunc.cc sql_window.cc sql_cte.cc + item_vers.cc sql_sequence.cc sql_sequence.h ha_sequence.h ${WSREP_SOURCES} table_cache.cc encryption.cc temporary_tables.cc @@ -148,6 +149,7 @@ SET (SQL_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc ${GEN_SOURCES} ${MYSYS_LIBWRAP_SOURCE} + vtmd.cc ) IF (CMAKE_SYSTEM_NAME MATCHES "Linux" OR diff --git a/sql/field.cc b/sql/field.cc index 51562dd4198..6f7958f02b8 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1987,6 +1987,41 @@ bool Field_num::get_date(MYSQL_TIME *ltime,ulonglong fuzzydate) } +bool Field_vers_trx_id::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate, ulonglong trx_id) +{ + ASSERT_COLUMN_MARKED_FOR_READ; + DBUG_ASSERT(ltime); + if (!table || !table->s) + return true; + DBUG_ASSERT(table->versioned_by_engine() || + (table->versioned() && table->s->table_category == TABLE_CATEGORY_TEMPORARY)); + if (!trx_id) + return true; + if (trx_id == ULONGLONG_MAX) + { + get_thd()->variables.time_zone->gmt_sec_to_TIME(ltime, TIMESTAMP_MAX_VALUE); + ltime->second_part= TIME_MAX_SECOND_PART; + return false; + } + if (cached == trx_id) + { + *ltime= cache; + return false; + } + handlerton *hton= table->file->partition_ht(); + DBUG_ASSERT(hton); + DBUG_ASSERT(hton->vers_query_trx_id); + bool found= hton->vers_query_trx_id(get_thd(), &cache, trx_id, VTQ_COMMIT_TS); + if (found) + { + *ltime= cache; + cached= trx_id; + return false; + } + return true; +} + + Field_str::Field_str(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const LEX_CSTRING *field_name_arg, @@ -4295,6 +4330,26 @@ void Field_longlong::sql_type(String &res) const add_zerofill_and_unsigned(res); } +void Field_longlong::set_max() +{ + ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; + set_notnull(); + int8store(ptr, unsigned_flag ? ULONGLONG_MAX : LONGLONG_MAX); +} + +bool Field_longlong::is_max() +{ + ASSERT_COLUMN_MARKED_FOR_READ; + if (unsigned_flag) + { + ulonglong j; + j= uint8korr(ptr); + return j == ULONGLONG_MAX; + } + longlong j; + j= sint8korr(ptr); + return j == LONGLONG_MAX; +} /* Floating-point numbers @@ -5307,13 +5362,41 @@ void Field_timestampf::store_TIME(my_time_t timestamp, ulong sec_part) my_timestamp_to_binary(&tm, ptr, dec); } +void Field_timestampf::set_max() +{ + DBUG_ENTER("Field_timestampf::set_max"); + ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; + DBUG_ASSERT(dec == TIME_SECOND_PART_DIGITS); + + set_notnull(); + mi_int4store(ptr, TIMESTAMP_MAX_VALUE); + mi_int3store(ptr + 4, TIME_MAX_SECOND_PART); + + DBUG_VOID_RETURN; +} + +bool Field_timestampf::is_max() +{ + DBUG_ENTER("Field_timestampf::is_max"); + ASSERT_COLUMN_MARKED_FOR_READ; + + DBUG_RETURN(mi_sint4korr(ptr) == TIMESTAMP_MAX_VALUE && + mi_sint3korr(ptr + 4) == TIME_MAX_SECOND_PART); +} my_time_t Field_timestampf::get_timestamp(const uchar *pos, ulong *sec_part) const { struct timeval tm; - my_timestamp_from_binary(&tm, pos, dec); - *sec_part= tm.tv_usec; + if (sec_part) + { + my_timestamp_from_binary(&tm, pos ? pos : ptr, dec); + *sec_part= tm.tv_usec; + } + else + { + my_timestamp_from_binary(&tm, pos ? pos : ptr, 0); + } return tm.tv_sec; } @@ -10307,7 +10390,8 @@ Field *make_field(TABLE_SHARE *share, Field::geometry_type geom_type, uint srid, Field::utype unireg_check, TYPELIB *interval, - const LEX_CSTRING *field_name) + const LEX_CSTRING *field_name, + uint32 flags) { uchar *UNINIT_VAR(bit_ptr); uchar UNINIT_VAR(bit_offset); @@ -10492,11 +10576,22 @@ Field *make_field(TABLE_SHARE *share, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case MYSQL_TYPE_LONGLONG: - return new (mem_root) - Field_longlong(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, - f_is_zerofill(pack_flag) != 0, - f_is_dec(pack_flag) == 0); + if (flags & (VERS_SYS_START_FLAG|VERS_SYS_END_FLAG)) + { + return new (mem_root) + Field_vers_trx_id(ptr, field_length, null_pos, null_bit, + unireg_check, field_name, + f_is_zerofill(pack_flag) != 0, + f_is_dec(pack_flag) == 0); + } + else + { + return new (mem_root) + Field_longlong(ptr,field_length,null_pos,null_bit, + unireg_check, field_name, + f_is_zerofill(pack_flag) != 0, + f_is_dec(pack_flag) == 0); + } case MYSQL_TYPE_TIMESTAMP: { uint dec= field_length > MAX_DATETIME_WIDTH ? @@ -10573,6 +10668,11 @@ Field *make_field(TABLE_SHARE *share, return 0; } +bool Field_vers_trx_id::test_if_equality_guarantees_uniqueness(const Item* item) const +{ + return item->type() == Item::DATE_ITEM; +} + /** Create a field suitable for create of table. */ @@ -10594,6 +10694,8 @@ Column_definition::Column_definition(THD *thd, Field *old_field, option_list= old_field->option_list; pack_flag= 0; compression_method_ptr= 0; + versioning= VERSIONING_NOT_SET; + implicit_not_null= false; if (orig_field) { diff --git a/sql/field.h b/sql/field.h index eb6510bc9a4..b9b9e052ec6 100644 --- a/sql/field.h +++ b/sql/field.h @@ -673,6 +673,14 @@ public: static void operator delete(void *ptr, MEM_ROOT *mem_root) { DBUG_ASSERT(0); } + /** + Used by System Versioning. + */ + virtual void set_max() + { DBUG_ASSERT(0); } + virtual bool is_max() + { DBUG_ASSERT(0); return false; } + uchar *ptr; // Position to field in record /** Byte where the @c NULL bit is stored inside a record. If this Field is a @@ -1000,6 +1008,9 @@ public: } bool set_explicit_default(Item *value); + virtual my_time_t get_timestamp(const uchar *pos= NULL, ulong *sec_part= NULL) const + { DBUG_ASSERT(0); return 0; } + /** Evaluates the @c UPDATE default function, if one exists, and stores the result in the record buffer. If no such function exists for the column, @@ -1439,6 +1450,16 @@ public: FIELD_FLAGS_COLUMN_FORMAT; } + bool vers_sys_field() const + { + return flags & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG); + } + + virtual bool vers_trx_id() const + { + return false; + } + /* Validate a non-null field value stored in the given record according to the current thread settings, e.g. sql_mode. @@ -2146,6 +2167,57 @@ public: { return unpack_int64(to, from, from_end); } + + void set_max(); + bool is_max(); +}; + + +class Field_vers_trx_id :public Field_longlong { + MYSQL_TIME cache; + ulonglong cached; +public: + Field_vers_trx_id(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, bool zero_arg, + bool unsigned_arg) + : Field_longlong(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, + unsigned_arg), + cached(0) + {} + enum_field_types real_type() const { return MYSQL_TYPE_LONGLONG; } + enum_field_types type() const { return MYSQL_TYPE_LONGLONG;} + uint size_of() const { return sizeof(*this); } + bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate, ulonglong trx_id); + bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate) + { + return get_date(ltime, fuzzydate, (ulonglong) val_int()); + } + bool test_if_equality_guarantees_uniqueness(const Item *item) const; + bool can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) const + { + return true; + } + + bool can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) const + { + return true; + } + bool can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const + { + return true; + } + /* cmp_type() cannot be TIME_RESULT, because we want to compare this field against + integers. But in all other cases we treat it as TIME_RESULT! */ + bool vers_trx_id() const + { + return true; + } }; @@ -2561,8 +2633,10 @@ public: { return memcmp(a_ptr, b_ptr, pack_length()); } + void set_max(); + bool is_max(); void store_TIME(my_time_t timestamp, ulong sec_part); - my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const; + my_time_t get_timestamp(const uchar *pos= NULL, ulong *sec_part= NULL) const; uint size_of() const { return sizeof(*this); } }; @@ -3982,7 +4056,8 @@ Field *make_field(TABLE_SHARE *share, MEM_ROOT *mem_root, CHARSET_INFO *cs, Field::geometry_type geom_type, uint srid, Field::utype unireg_check, - TYPELIB *interval, const LEX_CSTRING *field_name); + TYPELIB *interval, const LEX_CSTRING *field_name, + uint32 flags); /* Create field class for CREATE TABLE @@ -4036,6 +4111,12 @@ class Column_definition: public Sql_alloc, public: LEX_CSTRING field_name; LEX_CSTRING comment; // Comment for field + enum enum_column_versioning + { + VERSIONING_NOT_SET, + WITH_VERSIONING, + WITHOUT_VERSIONING + }; Item *on_update; // ON UPDATE NOW() /* At various stages in execution this can be length of field in bytes or @@ -4068,6 +4149,9 @@ public: *default_value, // Default value *check_constraint; // Check constraint + enum_column_versioning versioning; + bool implicit_not_null; + Column_definition() :Type_handler_hybrid_field_type(&type_handler_null), compression_method_ptr(0), @@ -4077,10 +4161,13 @@ public: interval(0), charset(&my_charset_bin), srid(0), geom_type(Field::GEOM_GEOMETRY), option_list(NULL), pack_flag(0), - vcol_info(0), default_value(0), check_constraint(0) + vcol_info(0), default_value(0), check_constraint(0), + versioning(VERSIONING_NOT_SET), + implicit_not_null(false) { interval_list.empty(); } + Column_definition(THD *thd, Field *field, Field *orig_field); void set_attributes(const Lex_field_type_st &type, CHARSET_INFO *cs); void create_length_to_internal_length_null() @@ -4207,7 +4294,7 @@ public: (uint32)length, null_pos, null_bit, pack_flag, type_handler(), charset, geom_type, srid, unireg_check, interval, - field_name_arg); + field_name_arg, flags); } Field *make_field(TABLE_SHARE *share, MEM_ROOT *mem_root, const LEX_CSTRING *field_name_arg) const diff --git a/sql/gen_lex_token.cc b/sql/gen_lex_token.cc index ebd966d9301..e024ee3799e 100644 --- a/sql/gen_lex_token.cc +++ b/sql/gen_lex_token.cc @@ -130,6 +130,8 @@ void compute_tokens() set_token(WITH_CUBE_SYM, "WITH CUBE"); set_token(WITH_ROLLUP_SYM, "WITH ROLLUP"); + set_token(WITH_SYSTEM_SYM, "WITH SYSTEM"); + set_token(FOR_SYSTEM_TIME_SYM, "FOR SYSTEM_TIME"); set_token(NOT2_SYM, "!"); set_token(OR2_SYM, "|"); set_token(PARAM_MARKER, "?"); diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 67132e5ee4f..a580f793e9a 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -160,9 +160,6 @@ static int partition_initialize(void *p) bool Partition_share::init(uint num_parts) { DBUG_ENTER("Partition_share::init"); - mysql_mutex_init(key_partition_auto_inc_mutex, - &auto_inc_mutex, - MY_MUTEX_INIT_FAST); auto_inc_initialized= false; partition_name_hash_initialized= false; next_auto_inc_val= 0; @@ -1265,12 +1262,12 @@ int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, (modelled after mi_check_print_msg) TODO: move this into the handler, or rewrite mysql_admin_table. */ -static bool print_admin_msg(THD* thd, uint len, +bool print_admin_msg(THD* thd, uint len, const char* msg_type, const char* db_name, String &table_name, const char* op_name, const char *fmt, ...) ATTRIBUTE_FORMAT(printf, 7, 8); -static bool print_admin_msg(THD* thd, uint len, +bool print_admin_msg(THD* thd, uint len, const char* msg_type, const char* db_name, String &table_name, const char* op_name, const char *fmt, ...) @@ -3588,7 +3585,7 @@ int ha_partition::open(const char *name, int mode, uint test_if_locked) m_part_info->part_expr->get_monotonicity_info(); else if (m_part_info->list_of_part_fields) m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING; - info(HA_STATUS_VARIABLE | HA_STATUS_CONST); + info(HA_STATUS_OPEN | HA_STATUS_VARIABLE | HA_STATUS_CONST); DBUG_RETURN(0); err_handler: @@ -4321,6 +4318,15 @@ int ha_partition::update_row(const uchar *old_data, const uchar *new_data) if (error) goto exit; + if (m_part_info->part_type == VERSIONING_PARTITION) + { + uint sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1; + DBUG_ASSERT(m_tot_parts == m_part_info->num_parts * sub_factor); + uint lpart_id= new_part_id / sub_factor; + // lpart_id is VERSIONING partition because new_part_id != old_part_id + m_part_info->vers_update_stats(thd, lpart_id); + } + tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */ error= m_file[old_part_id]->ha_delete_row(old_data); reenable_binlog(thd); @@ -5754,6 +5760,22 @@ int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen) } +int ha_partition::index_read_last_map(uchar *buf, + const uchar *key, + key_part_map keypart_map) +{ + DBUG_ENTER("ha_partition::index_read_last_map"); + + m_ordered= true; // Safety measure + end_range= NULL; + m_index_scan_type= partition_index_read_last; + m_start_key.key= key; + m_start_key.keypart_map= keypart_map; + m_start_key.flag= HA_READ_PREFIX_LAST; + DBUG_RETURN(common_index_read(buf, true)); +} + + /* Read next record when performing index scan backwards @@ -6563,6 +6585,7 @@ int ha_partition::info(uint flag) { uint no_lock_flag= flag & HA_STATUS_NO_LOCK; uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA; + uint open_flag= flag & HA_STATUS_OPEN; DBUG_ENTER("ha_partition::info"); #ifndef DBUG_OFF @@ -6603,7 +6626,7 @@ int ha_partition::info(uint flag) do { file= *file_array; - file->info(HA_STATUS_AUTO | no_lock_flag); + file->info(HA_STATUS_AUTO | no_lock_flag | open_flag); set_if_bigger(auto_increment_value, file->stats.auto_increment_value); } while (*(++file_array)); @@ -6657,7 +6680,7 @@ int ha_partition::info(uint flag) i= bitmap_get_next_set(&m_part_info->read_partitions, i)) { file= m_file[i]; - file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag); + file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag | open_flag); stats.records+= file->stats.records; stats.deleted+= file->stats.deleted; stats.data_file_length+= file->stats.data_file_length; @@ -6738,7 +6761,7 @@ int ha_partition::info(uint flag) if (!(flag & HA_STATUS_VARIABLE) || !bitmap_is_set(&(m_part_info->read_partitions), (uint)(file_array - m_file))) - file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag); + file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag | open_flag); if (file->stats.records > max_records) { max_records= file->stats.records; @@ -6757,7 +6780,7 @@ int ha_partition::info(uint flag) this); file= m_file[handler_instance]; - file->info(HA_STATUS_CONST | no_lock_flag); + file->info(HA_STATUS_CONST | no_lock_flag | open_flag); stats.block_size= file->stats.block_size; stats.create_time= file->stats.create_time; ref_length= m_ref_length; @@ -6773,7 +6796,7 @@ int ha_partition::info(uint flag) Note: all engines does not support HA_STATUS_ERRKEY, so set errkey. */ file->errkey= errkey; - file->info(HA_STATUS_ERRKEY | no_lock_flag); + file->info(HA_STATUS_ERRKEY | no_lock_flag | open_flag); errkey= file->errkey; } if (flag & HA_STATUS_TIME) @@ -6790,7 +6813,7 @@ int ha_partition::info(uint flag) do { file= *file_array; - file->info(HA_STATUS_TIME | no_lock_flag); + file->info(HA_STATUS_TIME | no_lock_flag | open_flag); if (file->stats.update_time > stats.update_time) stats.update_time= file->stats.update_time; } while (*(++file_array)); diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 0402908c640..981eb0aa74e 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -68,6 +68,8 @@ public: }; +extern PSI_mutex_key key_partition_auto_inc_mutex; + /** Partition specific Handler_share. */ @@ -85,24 +87,86 @@ public: HASH partition_name_hash; /** Storage for each partitions Handler_share */ Parts_share_refs partitions_share_refs; - Partition_share() {} + Partition_share() + : auto_inc_initialized(false), + next_auto_inc_val(0), + partition_name_hash_initialized(false), + partition_names(NULL) + { + mysql_mutex_init(key_partition_auto_inc_mutex, + &auto_inc_mutex, + MY_MUTEX_INIT_FAST); + } + ~Partition_share() { - DBUG_ENTER("Partition_share::~Partition_share"); mysql_mutex_destroy(&auto_inc_mutex); + if (partition_names) + { + my_free(partition_names); + } if (partition_name_hash_initialized) + { my_hash_free(&partition_name_hash); - DBUG_VOID_RETURN; + } } + bool init(uint num_parts); - void lock_auto_inc() + + /** + Release reserved auto increment values not used. + @param thd Thread. + @param table_share Table Share + @param next_insert_id Next insert id (first non used auto inc value). + @param max_reserved End of reserved auto inc range. + */ + void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share, + const ulonglong next_insert_id, + const ulonglong max_reserved); + + /** lock mutex protecting auto increment value next_auto_inc_val. */ + inline void lock_auto_inc() { mysql_mutex_lock(&auto_inc_mutex); } - void unlock_auto_inc() + /** unlock mutex protecting auto increment value next_auto_inc_val. */ + inline void unlock_auto_inc() { mysql_mutex_unlock(&auto_inc_mutex); } + /** + Populate partition_name_hash with partition and subpartition names + from part_info. + @param part_info Partition info containing all partitions metadata. + + @return Operation status. + @retval false Success. + @retval true Failure. + */ + bool populate_partition_name_hash(partition_info *part_info); + /** Get partition name. + + @param part_id Partition id (for subpartitioned table only subpartition + names will be returned.) + + @return partition name or NULL if error. + */ + const char *get_partition_name(size_t part_id) const; +private: + const uchar **partition_names; + /** + Insert [sub]partition name into partition_name_hash + @param name Partition name. + @param part_id Partition id. + @param is_subpart True if subpartition else partition. + + @return Operation status. + @retval false Success. + @retval true Failure. + */ + bool insert_partition_name_in_hash(const char *name, + uint part_id, + bool is_subpart); }; @@ -599,6 +663,10 @@ public: virtual int index_last(uchar * buf); virtual int index_next_same(uchar * buf, const uchar * key, uint keylen); + int index_read_last_map(uchar *buf, + const uchar *key, + key_part_map keypart_map); + /* read_first_row is virtual method but is only implemented by handler.cc, no storage engine has implemented it so neither @@ -1080,7 +1148,6 @@ private: ulonglong nr= (((Field_num*) field)->unsigned_flag || field->val_int() > 0) ? field->val_int() : 0; lock_auto_increment(); - DBUG_ASSERT(part_share->auto_inc_initialized); /* must check when the mutex is taken */ if (nr >= part_share->next_auto_inc_val) part_share->next_auto_inc_val= nr + 1; @@ -1271,7 +1338,37 @@ public: return h; } + virtual ha_rows part_records(void *_part_elem) + { + partition_element *part_elem= reinterpret_cast<partition_element *>(_part_elem); + DBUG_ASSERT(m_part_info); + uint32 sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1; + uint32 part_id= part_elem->id * sub_factor; + uint32 part_id_end= part_id + sub_factor; + DBUG_ASSERT(part_id_end <= m_tot_parts); + ha_rows part_recs= 0; + for (; part_id < part_id_end; ++part_id) + { + handler *file= m_file[part_id]; + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); + file->info(HA_STATUS_OPEN | HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + part_recs+= file->stats.records; + } + return part_recs; + } + + virtual handler* part_handler(uint32 part_id) + { + DBUG_ASSERT(part_id < m_tot_parts); + return m_file[part_id]; + } + friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); }; +bool print_admin_msg(THD* thd, uint len, + const char* msg_type, + const char* db_name, String &table_name, + const char* op_name, const char *fmt, ...); + #endif /* HA_PARTITION_INCLUDED */ diff --git a/sql/ha_sequence.cc b/sql/ha_sequence.cc index 93f6f32d473..4afa2168b8d 100644 --- a/sql/ha_sequence.cc +++ b/sql/ha_sequence.cc @@ -259,7 +259,8 @@ int ha_sequence::write_row(uchar *buf) sequence->copy(&tmp_seq); rows_changed++; /* We have to do the logging while we hold the sequence mutex */ - error= binlog_log_row(table, 0, buf, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, 0, buf, log_func); row_already_logged= 1; } diff --git a/sql/handler.cc b/sql/handler.cc index ca4f8634091..68e43119caa 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2435,6 +2435,12 @@ LEX_CSTRING *handler::engine_name() } +void handler::ha_statistic_increment(ulong SSV::*offset) const +{ + (table->in_use->status_var.*offset)++; +} + + double handler::keyread_time(uint index, uint ranges, ha_rows rows) { /* @@ -3023,6 +3029,36 @@ int handler::update_auto_increment() enum enum_check_fields save_count_cuted_fields; DBUG_ENTER("handler::update_auto_increment"); + // System Versioning: handle ALTER ADD COLUMN AUTO_INCREMENT + if (thd->lex->sql_command == SQLCOM_ALTER_TABLE && table->versioned_by_sql()) + { + Field *end= table->vers_end_field(); + DBUG_ASSERT(end); + bitmap_set_bit(table->read_set, end->field_index); + if (!end->is_max()) + { + uchar *ptr= table->next_number_field->ptr; + switch (table->next_number_field->pack_length()) + { + case 8: + int8store(ptr, vers_auto_decrement--); + break; + case 4: + int4store(ptr, vers_auto_decrement--); + break; + case 2: + int2store(ptr, vers_auto_decrement--); + break; + case 1: + *ptr= vers_auto_decrement--; + break; + default: + DBUG_ASSERT(false); + } + DBUG_RETURN(0); + } + } + /* next_insert_id is a "cursor" into the reserved interval, it may go greater than the interval, but not smaller. @@ -3145,7 +3181,7 @@ int handler::update_auto_increment() /* Store field without warning (Warning will be printed by insert) */ save_count_cuted_fields= thd->count_cuted_fields; thd->count_cuted_fields= CHECK_FIELD_IGNORE; - tmp= table->next_number_field->store((longlong) nr, TRUE); + tmp= table->next_number_field->store((longlong)nr, TRUE); thd->count_cuted_fields= save_count_cuted_fields; if (unlikely(tmp)) // Out of range value in store @@ -5681,8 +5717,10 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) 1 Row needs to be logged */ -inline bool handler::check_table_binlog_row_based(bool binlog_row) +bool handler::check_table_binlog_row_based(bool binlog_row) { + if (table->versioned_by_engine()) + return false; if (unlikely((table->in_use->variables.sql_log_bin_off))) return 0; /* Called by partitioning engine */ if (unlikely((!check_table_binlog_row_based_done))) @@ -5831,10 +5869,10 @@ static int write_locked_table_maps(THD *thd) static int check_wsrep_max_ws_rows(); -static int binlog_log_row_internal(TABLE* table, - const uchar *before_record, - const uchar *after_record, - Log_func *log_func) +int binlog_log_row(TABLE* table, + const uchar *before_record, + const uchar *after_record, + Log_func *log_func) { bool error= 0; THD *const thd= table->in_use; @@ -5869,16 +5907,6 @@ static int binlog_log_row_internal(TABLE* table, return error ? HA_ERR_RBR_LOGGING_FAILED : 0; } -int binlog_log_row(TABLE* table, - const uchar *before_record, - const uchar *after_record, - Log_func *log_func) -{ - if (!table->file->check_table_binlog_row_based(1)) - return 0; - return binlog_log_row_internal(table, before_record, after_record, log_func); -} - int handler::ha_external_lock(THD *thd, int lock_type) { @@ -6027,7 +6055,8 @@ int handler::ha_write_row(uchar *buf) if (likely(!error) && !row_already_logged) { rows_changed++; - error= binlog_log_row(table, 0, buf, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, 0, buf, log_func); } DEBUG_SYNC_C("ha_write_row_end"); DBUG_RETURN(error); @@ -6059,7 +6088,8 @@ int handler::ha_update_row(const uchar *old_data, const uchar *new_data) if (likely(!error) && !row_already_logged) { rows_changed++; - error= binlog_log_row(table, old_data, new_data, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, old_data, new_data, log_func); } return error; } @@ -6114,7 +6144,8 @@ int handler::ha_delete_row(const uchar *buf) if (likely(!error)) { rows_changed++; - error= binlog_log_row(table, buf, 0, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, buf, 0, log_func); } return error; } @@ -6594,3 +6625,498 @@ int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info) mysql_mutex_unlock(&LOCK_global_index_stats); DBUG_RETURN(res); } + +bool Vers_parse_info::is_trx_start(const char *name) const +{ + DBUG_ASSERT(name); + return as_row.start && as_row.start == LString_i(name); +} +bool Vers_parse_info::is_trx_end(const char *name) const +{ + DBUG_ASSERT(name); + return as_row.end && as_row.end == LString_i(name); +} +bool Vers_parse_info::is_trx_start(const Create_field &f) const +{ + return f.flags & VERS_SYS_START_FLAG; +} +bool Vers_parse_info::is_trx_end(const Create_field &f) const +{ + return f.flags & VERS_SYS_END_FLAG; +} + +static Create_field *vers_init_sys_field(THD *thd, const char *field_name, + int flags, bool integer_fields) +{ + Create_field *f= new (thd->mem_root) Create_field(); + if (!f) + return NULL; + + memset(f, 0, sizeof(*f)); + f->field_name.str= field_name; + f->field_name.length= strlen(field_name); + f->charset= system_charset_info; + f->flags= flags | HIDDEN_FLAG; + if (integer_fields) + { + f->set_handler(&type_handler_longlong); + f->flags|= UNSIGNED_FLAG; + f->length= MY_INT64_NUM_DECIMAL_DIGITS - 1; + } + else + { + f->set_handler(&type_handler_timestamp2); + f->length= MAX_DATETIME_PRECISION; + } + + if (f->check(thd)) + return NULL; + + return f; +} + +static bool vers_create_sys_field(THD *thd, const char *field_name, + Alter_info *alter_info, int flags, + bool integer_fields) +{ + Create_field *f= vers_init_sys_field(thd, field_name, flags, integer_fields); + if (!f) + return true; + + alter_info->flags|= Alter_info::ALTER_ADD_COLUMN; + alter_info->create_list.push_back(f); + + return false; +} + +static bool vers_change_sys_field(THD *thd, const char *field_name, + Alter_info *alter_info, int flags, + bool integer_fields, const char *change) +{ + Create_field *f= vers_init_sys_field(thd, field_name, flags, integer_fields); + if (!f) + return true; + + f->change.str= change; + f->change.length= strlen(change); + + alter_info->flags|= Alter_info::ALTER_CHANGE_COLUMN; + alter_info->create_list.push_back(f); + + return false; +} + +bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info, + bool integer_fields) +{ + // If user specified some of these he must specify the others too. Do nothing. + if (as_row.start || as_row.end || system_time.start || system_time.end) + return false; + + alter_info->flags|= Alter_info::ALTER_ADD_COLUMN; + + static const LString sys_trx_start= "sys_trx_start"; + static const LString sys_trx_end= "sys_trx_end"; + + system_time= start_end_t(sys_trx_start, sys_trx_end); + as_row= system_time; + + return vers_create_sys_field(thd, sys_trx_start, alter_info, + VERS_SYS_START_FLAG, + integer_fields) || + vers_create_sys_field(thd, sys_trx_end, alter_info, + VERS_SYS_END_FLAG, + integer_fields); +} + +bool Vers_parse_info::check_and_fix_implicit( + THD *thd, + Alter_info *alter_info, + HA_CREATE_INFO *create_info, + const char* table_name) +{ + SELECT_LEX &slex= thd->lex->select_lex; + int vers_tables= 0; + bool from_select= slex.item_list.elements ? true : false; + + if (from_select) + { + for (TABLE_LIST *table= slex.table_list.first; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + vers_tables++; + } + } + + // CREATE ... SELECT: if at least one table in SELECT is versioned, + // then created table will be versioned. + if (thd->variables.vers_force) + { + with_system_versioning= true; + create_info->options|= HA_VERSIONED_TABLE; + } + + // Possibly override default storage engine to match one used in source table. + if (from_select && with_system_versioning && + !(create_info->used_fields & HA_CREATE_USED_ENGINE)) + { + List_iterator_fast<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (is_trx_start(*f) || is_trx_end(*f)) + { + create_info->db_type= f->field->orig_table->file->ht; + break; + } + } + } + + if (!need_check()) + return false; + + if (!versioned_fields && unversioned_fields && !with_system_versioning) + { + // All is correct but this table is not versioned. + create_info->options&= ~HA_VERSIONED_TABLE; + return false; + } + + if (without_system_versioning) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_NOT_ALLOWED, MYF(0), table_name, + "WITHOUT SYSTEM VERSIONING"); + return true; + } + + if ((system_time.start || system_time.end || as_row.start || as_row.end) && + !with_system_versioning) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_MISSING, MYF(0), table_name, + "WITH SYSTEM VERSIONING"); + return true; + } + + TABLE *orig_table= NULL; + List_iterator<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (is_trx_start(*f)) + { + if (!as_row.start) // not inited in CREATE ... SELECT + { + DBUG_ASSERT(vers_tables > 0); + if (orig_table && orig_table != f->field->orig_table) + { + err_different_tables: + my_error_as(ER_VERS_WRONG_PARAMS, ER_VERS_DIFFERENT_TABLES, MYF(0), table_name); + return true; + } + orig_table= f->field->orig_table; + as_row.start= f->field_name; + system_time.start= as_row.start; + } + continue; + } + if (is_trx_end(*f)) + { + if (!as_row.end) + { + DBUG_ASSERT(vers_tables > 0); + if (orig_table && orig_table != f->field->orig_table) + { + goto err_different_tables; + } + orig_table= f->field->orig_table; + as_row.end= f->field_name; + system_time.end= as_row.end; + } + continue; + } + + if ((f->versioning == Column_definition::VERSIONING_NOT_SET && + !with_system_versioning) || + f->versioning == Column_definition::WITHOUT_VERSIONING) + { + f->flags|= VERS_OPTIMIZED_UPDATE_FLAG; + } + } + + bool integer_fields= create_info->db_type->flags & HTON_NATIVE_SYS_VERSIONING; + + if (fix_implicit(thd, alter_info, integer_fields)) + return true; + + int plain_cols= 0; // column doesn't have WITH or WITHOUT SYSTEM VERSIONING + int vers_cols= 0; // column has WITH SYSTEM VERSIONING + it.rewind(); + while (const Create_field *f= it++) + { + if (is_trx_start(*f) || is_trx_end(*f)) + continue; + + if (f->versioning == Column_definition::VERSIONING_NOT_SET) + plain_cols++; + else if (f->versioning == Column_definition::WITH_VERSIONING) + vers_cols++; + } + + bool table_with_system_versioning= + as_row.start || as_row.end || system_time.start || system_time.end; + + if (!thd->lex->tmp_table() && + // CREATE from SELECT (Create_fields are not yet added) + !from_select && + vers_cols == 0 && + (plain_cols == 0 || !table_with_system_versioning)) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_VERS_NO_COLS_DEFINED, MYF(0), + table_name, "WITH SYSTEM VERSIONING"); + return true; + } + + return check_with_conditions(table_name) || + check_generated_type(table_name, alter_info, integer_fields); +} + +static bool add_field_to_drop_list(THD *thd, Alter_info *alter_info, + Field *field) +{ + DBUG_ASSERT(field); + DBUG_ASSERT(field->field_name.str); + alter_info->flags|= Alter_info::ALTER_DROP_COLUMN; + Alter_drop *ad= new (thd->mem_root) + Alter_drop(Alter_drop::COLUMN, field->field_name.str, false); + return !ad || alter_info->drop_list.push_back(ad, thd->mem_root); +} + +bool Vers_parse_info::check_and_fix_alter(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, + TABLE_SHARE *share) +{ + bool integer_fields= + create_info->db_type->flags & HTON_NATIVE_SYS_VERSIONING; + const char *table_name= share->table_name.str; + + if (!need_check() && !share->versioned) + return false; + + if (without_system_versioning) + { + if (!share->versioned) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_VERS_NOT_VERSIONED, MYF(0), table_name); + return true; + } + + if (!(share->vers_start_field()->flags & HIDDEN_FLAG)) + { + my_error(ER_VERS_SYS_FIELD_NOT_HIDDEN, MYF(0), + share->vers_start_field()->field_name); + return true; + } + if (!(share->vers_end_field()->flags & HIDDEN_FLAG)) + { + my_error(ER_VERS_SYS_FIELD_NOT_HIDDEN, MYF(0), + share->vers_end_field()->field_name); + return true; + } + + if (add_field_to_drop_list(thd, alter_info, share->vers_start_field()) || + add_field_to_drop_list(thd, alter_info, share->vers_end_field())) + return true; + + return false; + } + + if ((versioned_fields || unversioned_fields) && !share->versioned) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_VERS_NOT_VERSIONED, MYF(0), table_name); + return true; + } + + if (share->versioned) + { + // copy info from existing table + create_info->options|= HA_VERSIONED_TABLE; + + DBUG_ASSERT(share->vers_start_field() && share->vers_end_field()); + LString start(share->vers_start_field()->field_name); + LString end(share->vers_end_field()->field_name); + DBUG_ASSERT(start.ptr() && end.ptr()); + + as_row= start_end_t(start, end); + system_time= as_row; + + if (alter_info->create_list.elements) + { + List_iterator_fast<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (f->versioning == Column_definition::WITHOUT_VERSIONING) + f->flags|= VERS_OPTIMIZED_UPDATE_FLAG; + + if (f->change.str && (start == f->change || end == f->change)) + { + my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->change); + return true; + } + } + } + + if (alter_info->drop_list.elements) + { + bool done_start= false; + bool done_end= false; + List_iterator<Alter_drop> it(alter_info->drop_list); + while (Alter_drop *d= it++) + { + const char *name= d->name; + Field *f= NULL; + if (!done_start && is_trx_start(name)) + { + f= share->vers_start_field(); + done_start= true; + } + else if (!done_end && is_trx_end(name)) + { + f= share->vers_end_field(); + done_end= true; + } + else + continue; + if (f->flags & HIDDEN_FLAG) + { + my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0), d->type_name(), name); + return true; + } + + if (vers_change_sys_field(thd, name, alter_info, + f->flags & + (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG), + integer_fields, name)) + { + return true; + } + + it.remove(); + + if (done_start && done_end) + break; + } + } + + return false; + } + + return fix_implicit(thd, alter_info, integer_fields) || + (with_system_versioning && + (check_with_conditions(table_name) || + check_generated_type(table_name, alter_info, integer_fields))); +} + +bool Vers_parse_info::fix_create_like(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, TABLE_LIST *table) +{ + List_iterator<Create_field> it(alter_info->create_list); + Create_field *f, *f_start=NULL, *f_end= NULL; + + DBUG_ASSERT(alter_info->create_list.elements > 2); + while ((f= it++)) + { + if (f->flags & VERS_SYS_START_FLAG) + { + f_start= f; + if (f_end) + break; + } + else if (f->flags & VERS_SYS_END_FLAG) + { + f_end= f; + if (f_start) + break; + } + } + + if (!f_start || !f_end) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_MISSING, MYF(0), table->table_name, + f_start ? "AS ROW END" : "AS ROW START"); + return true; + } + + as_row= start_end_t(f_start->field_name, f_end->field_name); + system_time= as_row; + + create_info->options|= HA_VERSIONED_TABLE; + return false; +} + + +bool Vers_parse_info::check_with_conditions(const char *table_name) const +{ + if (!as_row.start || !as_row.end) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_MISSING, MYF(0), table_name, + as_row.start ? "AS ROW END" : "AS ROW START"); + return true; + } + + if (!system_time.start || !system_time.end) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_MISSING, MYF(0), table_name, + "PERIOD FOR SYSTEM_TIME"); + return true; + } + + if (as_row.start != system_time.start) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_MISMATCH, MYF(0), table_name, + "PERIOD FOR SYSTEM_TIME", "AS ROW START"); + return true; + } + + if (as_row.end != system_time.end) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_MISMATCH, MYF(0), table_name, + "PERIOD FOR SYSTEM_TIME", "AS ROW END"); + return true; + } + + return false; +} + +bool Vers_parse_info::check_generated_type(const char *table_name, + Alter_info *alter_info, + bool integer_fields) const +{ + List_iterator<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (is_trx_start(*f) || is_trx_end(*f)) + { + if (integer_fields) + { + if (f->type_handler() != &type_handler_longlong || !(f->flags & UNSIGNED_FLAG) || + f->length != (MY_INT64_NUM_DECIMAL_DIGITS - 1)) + { + my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), f->field_name.str, + "BIGINT(20) UNSIGNED", table_name); + return true; + } + } + else + { + if (!(f->type_handler() == &type_handler_datetime2 || + f->type_handler() == &type_handler_timestamp2) || + f->length != MAX_DATETIME_FULL_WIDTH) + { + my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), f->field_name.str, + "TIMESTAMP(6)", table_name); + return true; + } + } + } + } + + return false; +} diff --git a/sql/handler.h b/sql/handler.h index f681040db39..66fa336f675 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -34,6 +34,8 @@ #include "structs.h" /* SHOW_COMP_OPTION */ #include "sql_array.h" /* Dynamic_array<> */ #include "mdl.h" +#include "vtq.h" +#include "vers_string.h" #include "sql_analyze_stmt.h" // for Exec_time_tracker @@ -400,6 +402,8 @@ enum enum_alter_inplace_result { #define HA_LEX_CREATE_TMP_TABLE 1U #define HA_CREATE_TMP_ALTER 8U #define HA_LEX_CREATE_SEQUENCE 16U +#define HA_VERSIONED_TABLE 32U +#define HA_VTMD 64U #define HA_MAX_REC_LENGTH 65535 @@ -1382,6 +1386,40 @@ struct handlerton */ int (*discover_table_structure)(handlerton *hton, THD* thd, TABLE_SHARE *share, HA_CREATE_INFO *info); + + /* + System Versioning + */ + /** + Query VTQ by TRX_ID. + @param[in] thd MySQL thread + @param[out] out field value or whole record returned by query (selected by `field`) + @param[in] in_trx_id query parameter TRX_ID + @param[in] field field to get in `out` or VTQ_ALL for whole record (vtq_record_t) + @return TRUE if record is found, FALSE otherwise */ + bool (*vers_query_trx_id)(THD* thd, void *out, ulonglong trx_id, vtq_field_t field); + + /** Query VTQ by COMMIT_TS. + @param[in] thd MySQL thread + @param[out] out field value or whole record returned by query (selected by `field`) + @param[in] commit_ts query parameter COMMIT_TS + @param[in] field field to get in `out` or VTQ_ALL for whole record (vtq_record_t) + @param[in] backwards direction of VTQ search + @return TRUE if record is found, FALSE otherwise */ + bool (*vers_query_commit_ts)(THD* thd, void *out, const MYSQL_TIME &commit_ts, + vtq_field_t field, bool backwards); + + /** Check if transaction TX1 sees transaction TX0. + @param[in] thd MySQL thread + @param[out] result true if TX1 sees TX0 + @param[in] trx_id1 TX1 TRX_ID + @param[in] trx_id0 TX0 TRX_ID + @param[in] commit_id1 TX1 COMMIT_ID + @param[in] iso_level1 TX1 isolation level + @param[in] commit_id0 TX0 COMMIT_ID + @return FALSE if there is no trx_id1 in VTQ, otherwise TRUE */ + bool (*vers_trx_sees)(THD *thd, bool &result, ulonglong trx_id1, ulonglong trx_id0, + ulonglong commit_id1, uchar iso_level1, ulonglong commit_id0); }; @@ -1429,6 +1467,7 @@ handlerton *ha_default_tmp_handlerton(THD *thd); */ #define HTON_NO_BINLOG_ROW_OPT (1 << 9) #define HTON_SUPPORTS_EXTENDED_KEYS (1 <<10) //supports extended keys +#define HTON_NATIVE_SYS_VERSIONING (1 << 11) //Engine supports System Versioning // MySQL compatibility. Unused. #define HTON_SUPPORTS_FOREIGN_KEYS (1 << 0) //Foreign key constraint supported. @@ -1674,6 +1713,86 @@ struct Schema_specification_st } }; +class Create_field; + +struct Vers_parse_info +{ + Vers_parse_info() : + with_system_versioning(false), + without_system_versioning(false), + versioned_fields(false), + unversioned_fields(false) + {} + + struct start_end_t + { + start_end_t() + {} + start_end_t(LEX_CSTRING _start, LEX_CSTRING _end) : + start(_start), + end(_end) {} + LString_i start; + LString_i end; + }; + + start_end_t system_time; + start_end_t as_row; + + void set_period_for_system_time(LString start, LString end) + { + system_time.start= start; + system_time.end= end; + } + +private: + bool is_trx_start(const char *name) const; + bool is_trx_end(const char *name) const; + bool is_trx_start(const Create_field &f) const; + bool is_trx_end(const Create_field &f) const; + bool fix_implicit(THD *thd, Alter_info *alter_info, bool integer_fields); + bool need_check() const + { + return + versioned_fields || + unversioned_fields || + with_system_versioning || + without_system_versioning || + system_time.start || + system_time.end || + as_row.start || + as_row.end; + } + bool check_with_conditions(const char *table_name) const; + bool check_generated_type(const char *table_name, Alter_info *alter_info, + bool integer_fields) const; + +public: + bool check_and_fix_implicit(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, + const char *table_name); + bool check_and_fix_alter(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, TABLE_SHARE *share); + bool fix_create_like(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, TABLE_LIST *table); + + /** User has added 'WITH SYSTEM VERSIONING' to table definition */ + bool with_system_versioning : 1; + + /** Use has added 'WITHOUT SYSTEM VERSIONING' to ALTER TABLE */ + bool without_system_versioning : 1; + + /** + At least one field was specified 'WITH SYSTEM VERSIONING'. Useful for + error handling. + */ + bool versioned_fields : 1; + + /** + At least one field was specified 'WITHOUT SYSTEM VERSIONING'. Useful for + error handling. + */ + bool unversioned_fields : 1; +}; /** A helper struct for table DDL statements, e.g.: @@ -1749,6 +1868,8 @@ struct Table_scope_and_contents_source_st bool table_was_deleted; sequence_definition *seq_create_info; + Vers_parse_info vers_info; + void init() { bzero(this, sizeof(*this)); @@ -1759,6 +1880,16 @@ struct Table_scope_and_contents_source_st db_type= tmp_table() ? ha_default_tmp_handlerton(thd) : ha_default_handlerton(thd); } + + bool versioned() const + { + return options & HA_VERSIONED_TABLE; + } + + bool vtmd() const + { + return options & HA_VTMD; + } }; @@ -2025,6 +2156,8 @@ public: static const HA_ALTER_FLAGS ALTER_DROP_CHECK_CONSTRAINT= 1ULL << 40; + static const HA_ALTER_FLAGS ALTER_DROP_HISTORICAL = 1ULL << 41; + /** Create options (like MAX_ROWS) for the new version of table. @@ -2757,6 +2890,8 @@ public: */ uint auto_inc_intervals_count; + ulonglong vers_auto_decrement; + /** Instrumented table associated with this handler. This member should be set to NULL when no instrumentation is in place, @@ -3191,6 +3326,18 @@ protected: virtual int index_last(uchar * buf) { return HA_ERR_WRONG_COMMAND; } virtual int index_next_same(uchar *buf, const uchar *key, uint keylen); + /** + @brief + The following functions works like index_read, but it find the last + row with the current key value or prefix. + @returns @see index_read_map(). + */ + virtual int index_read_last_map(uchar * buf, const uchar * key, + key_part_map keypart_map) + { + uint key_len= calculate_key_len(table, active_index, key, keypart_map); + return index_read_last(buf, key, key_len); + } virtual int close(void)=0; inline void update_rows_read() { @@ -3270,7 +3417,7 @@ public: void ft_end() { ft_handler=NULL; } virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key) { return NULL; } -private: +public: virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; } virtual int rnd_next(uchar *buf)=0; virtual int rnd_pos(uchar * buf, uchar *pos)=0; @@ -3977,6 +4124,7 @@ public: TABLE_SHARE* get_table_share() { return table_share; } protected: /* Service methods for use by storage engines. */ + void ha_statistic_increment(ulong SSV::*offset) const; void **ha_data(THD *) const; THD *ha_thd(void) const; @@ -4001,8 +4149,8 @@ protected: virtual int delete_table(const char *name); public: - inline bool check_table_binlog_row_based(bool binlog_row); -private: + bool check_table_binlog_row_based(bool binlog_row); + /* Cache result to avoid extra calls */ inline void mark_trx_read_write() { @@ -4012,6 +4160,8 @@ private: mark_trx_read_write_internal(); } } + +private: void mark_trx_read_write_internal(); bool check_table_binlog_row_based_internal(bool binlog_row); @@ -4130,6 +4280,11 @@ protected: virtual int index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag) { return HA_ERR_WRONG_COMMAND; } + virtual int index_read_last(uchar * buf, const uchar * key, uint key_len) + { + my_errno= HA_ERR_WRONG_COMMAND; + return HA_ERR_WRONG_COMMAND; + } friend class ha_partition; friend class ha_sequence; public: @@ -4253,6 +4408,15 @@ public: */ virtual int find_unique_row(uchar *record, uint unique_ref) { return -1; /*unsupported */} + + bool native_versioned() const + { DBUG_ASSERT(ht); return partition_ht()->flags & HTON_NATIVE_SYS_VERSIONING; } + virtual ha_rows part_records(void *_part_elem) + { DBUG_ASSERT(0); return false; } + virtual handler* part_handler(uint32 part_id) + { DBUG_ASSERT(0); return NULL; } + virtual void update_partition(uint part_id) + {} protected: Handler_share *get_ha_share_ptr(); void set_ha_share_ptr(Handler_share *arg_ha_share); diff --git a/sql/item.cc b/sql/item.cc index 024fbd87d79..a0a06975ea7 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -5787,6 +5787,7 @@ bool Item_field::fix_fields(THD *thd, Item **reference) expression to 'reference', i.e. it substitute that expression instead of this Item_field */ + DBUG_ASSERT(context); if ((from_field= find_field_in_tables(thd, this, context->first_name_resolution_table, context->last_name_resolution_table, @@ -6641,7 +6642,7 @@ int Item_int::save_in_field(Field *field, bool no_conversions) Item *Item_int::clone_item(THD *thd) { - return new (thd->mem_root) Item_int(thd, name.str, value, max_length); + return new (thd->mem_root) Item_int(thd, name.str, value, max_length, unsigned_flag); } @@ -6971,6 +6972,26 @@ bool Item_temporal_literal::eq(const Item *item, bool binary_cmp) const &((Item_temporal_literal *) item)->cached_time); } +bool Item_temporal_literal::operator<(const MYSQL_TIME <ime) const +{ + if (my_time_compare(&cached_time, <ime) < 0) + return true; + return false; +} + +bool Item_temporal_literal::operator>(const MYSQL_TIME <ime) const +{ + if (my_time_compare(&cached_time, <ime) > 0) + return true; + return false; +} + +bool Item_temporal_literal::operator==(const MYSQL_TIME <ime) const +{ + if (my_time_compare(&cached_time, <ime) == 0) + return true; + return false; +} void Item_date_literal::print(String *str, enum_query_type query_type) { @@ -10329,6 +10350,35 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) return find_matching_grouping_field(this, sel) != NULL; } +Item *Item_field::vers_optimized_fields_transformer(THD *thd, uchar *) +{ + if (!field) + return this; + + if (field->flags & VERS_OPTIMIZED_UPDATE_FLAG && context && + field->table->pos_in_table_list && + field->table->pos_in_table_list->vers_conditions) + { + push_warning_printf( + current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_NON_VERSIONED_FIELD_IN_VERSIONED_QUERY, + ER_THD(current_thd, ER_NON_VERSIONED_FIELD_IN_VERSIONED_QUERY), + field_name); + + Item *null_item= new (thd->mem_root) Item_null(thd); + if (null_item) + return null_item; + } + + return this; +} + +bool Item_field::vers_trx_id() const +{ + DBUG_ASSERT(field); + return field->vers_trx_id(); +} + void Item::register_in(THD *thd) { next= thd->free_list; diff --git a/sql/item.h b/sql/item.h index 1f743913d78..6184417136a 100644 --- a/sql/item.h +++ b/sql/item.h @@ -541,7 +541,6 @@ public: String_copier_for_item(THD *thd): m_thd(thd) { } }; - class Item: public Value_source, public Type_all_attributes { @@ -775,6 +774,10 @@ public: return type_handler()->field_type(); } virtual const Type_handler *type_handler() const= 0; + virtual uint field_flags() const + { + return 0; + } const Type_handler *type_handler_for_comparison() const { return type_handler()->type_handler_for_comparison(); @@ -1743,6 +1746,10 @@ public: virtual Item_field *field_for_view_update() { return 0; } + virtual Item *vers_optimized_fields_transformer(THD *thd, uchar *) + { return this; } + virtual bool vers_trx_id() const + { return false; } virtual Item *neg_transformer(THD *thd) { return NULL; } virtual Item *update_value_transformer(THD *thd, uchar *select_arg) { return this; } @@ -1933,8 +1940,10 @@ public: { marker &= ~EXTRACTION_MASK; } -}; + /* System versioning */ + virtual vtq_record_t *vtq_cached_result() { return NULL; } +}; template <class T> inline Item* get_item_copy (THD *thd, MEM_ROOT *mem_root, T* item) @@ -2783,6 +2792,10 @@ public: return field->type_handler(); } TYPELIB *get_typelib() const { return field->get_typelib(); } + uint32 field_flags() const + { + return field->flags; + } enum_monotonicity_info get_monotonicity_info() const { return MONOTONIC_STRICT_INCREASING; @@ -2879,6 +2892,8 @@ public: uint32 max_display_length() const { return field->max_display_length(); } Item_field *field_for_view_update() { return this; } int fix_outer_field(THD *thd, Field **field, Item **reference); + virtual Item *vers_optimized_fields_transformer(THD *thd, uchar *); + virtual bool vers_trx_id() const; virtual Item *update_value_transformer(THD *thd, uchar *select_arg); Item *derived_field_transformer_for_having(THD *thd, uchar *arg); Item *derived_field_transformer_for_where(THD *thd, uchar *arg); @@ -3320,6 +3335,14 @@ public: name.str= str_arg; name.length= safe_strlen(name.str); fixed= 1; } + Item_int(THD *thd, const char *str_arg,longlong i,uint length, bool flag): + Item_num(thd), value(i) + { + max_length=length; + name.str= str_arg; name.length= safe_strlen(name.str); + fixed= 1; + unsigned_flag= flag; + } Item_int(THD *thd, const char *str_arg, uint length=64); enum Type type() const { return INT_ITEM; } const Type_handler *type_handler() const @@ -3770,10 +3793,10 @@ class Item_return_date_time :public Item_partition_func_safe_string enum_field_types date_time_field_type; public: Item_return_date_time(THD *thd, const char *name_arg, uint length_arg, - enum_field_types field_type_arg): + enum_field_types field_type_arg, uint dec_arg= 0): Item_partition_func_safe_string(thd, name_arg, length_arg, &my_charset_bin), date_time_field_type(field_type_arg) - { decimals= 0; } + { decimals= dec_arg; } const Type_handler *type_handler() const { return Type_handler::get_handler_by_field_type(date_time_field_type); @@ -4014,6 +4037,13 @@ public: { return val_decimal_from_date(decimal_value); } int save_in_field(Field *field, bool no_conversions) { return save_date_in_field(field, no_conversions); } + void set_time(MYSQL_TIME *ltime) + { + cached_time= *ltime; + } + bool operator>(const MYSQL_TIME <ime) const; + bool operator<(const MYSQL_TIME <ime) const; + bool operator==(const MYSQL_TIME <ime) const; }; @@ -4073,7 +4103,7 @@ public: class Item_datetime_literal: public Item_temporal_literal { public: - Item_datetime_literal(THD *thd, MYSQL_TIME *ltime, uint dec_arg): + Item_datetime_literal(THD *thd, MYSQL_TIME *ltime, uint dec_arg= 0): Item_temporal_literal(thd, ltime, dec_arg) { max_length= MAX_DATETIME_WIDTH + (decimals ? decimals + 1 : 0); @@ -5056,6 +5086,7 @@ public: #include "item_xmlfunc.h" #include "item_jsonfunc.h" #include "item_create.h" +#include "item_vers.h" #endif /** @@ -6003,6 +6034,12 @@ public: { DBUG_ASSERT(item->fixed); maybe_null= item->maybe_null; + if (item->real_type() == Item::FIELD_ITEM) + { + Item_field *item_field= (Item_field *)item->real_item(); + flags|= (item_field->field->flags & + (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG)); + } } Item_type_holder(THD *thd, const LEX_CSTRING *name_arg, @@ -6050,6 +6087,12 @@ public: Type_geometry_attributes::set_geometry_type(type); } Item* get_copy(THD *thd, MEM_ROOT *mem_root) { return 0; } + + uint flags; + uint32 field_flags() const + { + return flags; + } }; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index f35017629aa..85bee75fe79 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -127,9 +127,12 @@ Type_handler_hybrid_field_type::aggregate_for_comparison(const char *funcname, many cases. */ set_handler(items[0]->type_handler()->type_handler_for_comparison()); + m_vers_trx_id= items[0]->vers_trx_id(); for (uint i= 1 ; i < nitems ; i++) { unsigned_count+= items[i]->unsigned_flag; + if (!m_vers_trx_id) + m_vers_trx_id= items[i]->vers_trx_id(); if (aggregate_for_comparison(items[i]->type_handler()-> type_handler_for_comparison())) { @@ -421,7 +424,7 @@ void Item_func::convert_const_compared_to_int_field(THD *thd) args[field= 1]->real_item()->type() == FIELD_ITEM) { Item_field *field_item= (Item_field*) (args[field]->real_item()); - if ((field_item->field_type() == MYSQL_TYPE_LONGLONG || + if (((field_item->field_type() == MYSQL_TYPE_LONGLONG && !field_item->vers_trx_id()) || field_item->field_type() == MYSQL_TYPE_YEAR)) convert_const_to_int(thd, field_item, &args[!field]); } @@ -5265,7 +5268,6 @@ bool fix_escape_item(THD *thd, Item *escape_item, String *tmp_str, return FALSE; } - bool Item_func_like::fix_fields(THD *thd, Item **ref) { DBUG_ASSERT(fixed == 0); diff --git a/sql/item_create.cc b/sql/item_create.cc index 5d6d9742c7a..d73a5327d25 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -43,38 +43,6 @@ */ /** - Adapter for native functions with a variable number of arguments. - The main use of this class is to discard the following calls: - <code>foo(expr1 AS name1, expr2 AS name2, ...)</code> - which are syntactically correct (the syntax can refer to a UDF), - but semantically invalid for native functions. -*/ - -class Create_native_func : public Create_func -{ -public: - virtual Item *create_func(THD *thd, LEX_CSTRING *name, - List<Item> *item_list); - - /** - Builder method, with no arguments. - @param thd The current thread - @param name The native function name - @param item_list The function parameters, none of which are named - @return An item representing the function call - */ - virtual Item *create_native(THD *thd, LEX_CSTRING *name, - List<Item> *item_list) = 0; - -protected: - /** Constructor. */ - Create_native_func() {} - /** Destructor. */ - virtual ~Create_native_func() {} -}; - - -/** Adapter for functions that takes exactly zero arguments. */ @@ -6827,12 +6795,6 @@ Create_func_year_week::create_native(THD *thd, LEX_CSTRING *name, } -struct Native_func_registry -{ - LEX_CSTRING name; - Create_func *builder; -}; - #define BUILDER(F) & F::s_singleton #ifdef HAVE_SPATIAL @@ -7218,8 +7180,6 @@ get_native_fct_hash_key(const uchar *buff, size_t *length, int item_create_init() { - Native_func_registry *func; - DBUG_ENTER("item_create_init"); if (my_hash_init(& native_functions_hash, @@ -7232,7 +7192,16 @@ int item_create_init() MYF(0))) DBUG_RETURN(1); - for (func= func_array; func->builder != NULL; func++) + DBUG_RETURN(item_create_append(func_array)); +} + +int item_create_append(Native_func_registry array[]) +{ + Native_func_registry *func; + + DBUG_ENTER("item_create_append"); + + for (func= array; func->builder != NULL; func++) { if (my_hash_insert(& native_functions_hash, (uchar*) func)) DBUG_RETURN(1); diff --git a/sql/item_create.h b/sql/item_create.h index 128a19a1c15..e0beca37082 100644 --- a/sql/item_create.h +++ b/sql/item_create.h @@ -19,6 +19,8 @@ #ifndef ITEM_CREATE_H #define ITEM_CREATE_H +#include "item_func.h" // Cast_target + typedef struct st_udf_func udf_func; /** @@ -67,6 +69,38 @@ protected: /** + Adapter for native functions with a variable number of arguments. + The main use of this class is to discard the following calls: + <code>foo(expr1 AS name1, expr2 AS name2, ...)</code> + which are syntactically correct (the syntax can refer to a UDF), + but semantically invalid for native functions. +*/ + +class Create_native_func : public Create_func +{ +public: + virtual Item *create_func(THD *thd, LEX_CSTRING *name, + List<Item> *item_list); + + /** + Builder method, with no arguments. + @param thd The current thread + @param name The native function name + @param item_list The function parameters, none of which are named + @return An item representing the function call + */ + virtual Item *create_native(THD *thd, LEX_CSTRING *name, + List<Item> *item_list) = 0; + +protected: + /** Constructor. */ + Create_native_func() {} + /** Destructor. */ + virtual ~Create_native_func() {} +}; + + +/** Function builder for qualified functions. This builder is used with functions call using a qualified function name syntax, as in <code>db.func(expr, expr, ...)</code>. @@ -172,7 +206,14 @@ Item *create_temporal_literal(THD *thd, const String *str, type, send_error); } +struct Native_func_registry +{ + LEX_STRING name; + Create_func *builder; +}; + int item_create_init(); +int item_create_append(Native_func_registry array[]); void item_create_cleanup(); Item *create_func_dyncol_create(THD *thd, List<DYNCALL_CREATE_DEF> &list); diff --git a/sql/item_func.h b/sql/item_func.h index de213df0fc5..e8c9c0ef484 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -30,6 +30,9 @@ extern "C" /* Bug in BSDI include file */ } #endif +#include "sql_udf.h" // udf_handler +#include "my_decimal.h" // string2my_decimal + class Item_func :public Item_func_or_sum { diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 3a7684fe7b4..9399ec9d359 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -1687,6 +1687,15 @@ void Item_func_curtime_utc::store_now_in_TIME(THD *thd, MYSQL_TIME *now_time) */ } + +Item_func_now::Item_func_now(THD *thd, uint dec) : + Item_datetimefunc(thd, new (thd->mem_root) Item_decimal(thd, dec, TRUE)), + last_query_id(0) +{ + decimals = dec; +} + + bool Item_func_now::fix_fields(THD *thd, Item **items) { if (decimals > TIME_SECOND_PART_DIGITS) diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index 9c102e8a666..fc6d297d9ce 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -746,8 +746,7 @@ class Item_func_now :public Item_datetimefunc MYSQL_TIME ltime; query_id_t last_query_id; public: - Item_func_now(THD *thd, uint dec): Item_datetimefunc(thd), last_query_id(0) - { decimals= dec; } + Item_func_now(THD *thd, uint dec); bool fix_fields(THD *, Item **); void fix_length_and_dec() { fix_attributes_datetime(decimals); } bool get_date(MYSQL_TIME *res, ulonglong fuzzy_date); diff --git a/sql/item_vers.cc b/sql/item_vers.cc new file mode 100644 index 00000000000..5866afa4570 --- /dev/null +++ b/sql/item_vers.cc @@ -0,0 +1,259 @@ +/* Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + + +/** + @brief + System Versioning items +*/ + +#include "sql_class.h" +#include "tztime.h" +#include "item.h" + +Item_func_vtq_ts::Item_func_vtq_ts( + THD *thd, + handlerton* hton, + Item* a, + vtq_field_t _vtq_field) : + VTQ_common<Item_datetimefunc>(thd, hton, a), + vtq_field(_vtq_field) +{ + decimals= 6; + null_value= true; + DBUG_ASSERT(arg_count == 1 && args[0]); + check_hton(); +} + +template <class Item_func_X> +void +VTQ_common<Item_func_X>::check_hton() +{ + DBUG_ASSERT(hton); + if (!(hton->flags & HTON_NATIVE_SYS_VERSIONING) && hton->db_type != DB_TYPE_HEAP) + { + my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), Item::name.str ? Item::name.str : this->func_name()); + hton= NULL; + } +} + +bool +Item_func_vtq_ts::get_date(MYSQL_TIME *res, ulonglong fuzzy_date) +{ + THD *thd= current_thd; // can it differ from constructor's? + DBUG_ASSERT(thd); + DBUG_ASSERT(args[0]); + if (args[0]->result_type() != INT_RESULT) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), + func_name()); + return true; + } + ulonglong trx_id= args[0]->val_uint(); + if (trx_id == ULONGLONG_MAX) + { + null_value= false; + thd->variables.time_zone->gmt_sec_to_TIME(res, TIMESTAMP_MAX_VALUE); + res->second_part= TIME_MAX_SECOND_PART; + return false; + } + + DBUG_ASSERT(hton && hton->vers_query_trx_id); + null_value= !hton->vers_query_trx_id(thd, res, trx_id, vtq_field); + if (null_value) + { + my_error(ER_VERS_NO_TRX_ID, MYF(0), trx_id); + } + + return null_value; +} + + +Item_func_vtq_id::Item_func_vtq_id( + THD *thd, + handlerton *hton, + Item* a, + vtq_field_t _vtq_field, + bool _backwards) : + VTQ_common<Item_longlong_func>(thd, hton, a), + vtq_field(_vtq_field), + backwards(_backwards) +{ + memset(&cached_result, 0, sizeof(cached_result)); + decimals= 0; + unsigned_flag= 1; + null_value= true; + DBUG_ASSERT(arg_count == 1 && args[0]); + check_hton(); +} + +Item_func_vtq_id::Item_func_vtq_id( + THD *thd, + handlerton *hton, + Item* a, + Item* b, + vtq_field_t _vtq_field) : + VTQ_common<Item_longlong_func>(thd, hton, a, b), + vtq_field(_vtq_field), + backwards(false) +{ + memset(&cached_result, 0, sizeof(cached_result)); + decimals= 0; + unsigned_flag= 1; + null_value= true; + DBUG_ASSERT(arg_count == 2 && args[0] && args[1]); + check_hton(); +} + +longlong +Item_func_vtq_id::get_by_trx_id(ulonglong trx_id) +{ + ulonglong res; + THD *thd= current_thd; // can it differ from constructor's? + DBUG_ASSERT(thd); + + if (trx_id == ULONGLONG_MAX) + { + null_value= true; + return 0; + } + + DBUG_ASSERT(hton->vers_query_trx_id); + null_value= !hton->vers_query_trx_id(thd, &res, trx_id, vtq_field); + return res; +} + +longlong +Item_func_vtq_id::get_by_commit_ts(MYSQL_TIME &commit_ts, bool backwards) +{ + THD *thd= current_thd; // can it differ from constructor's? + DBUG_ASSERT(thd); + + DBUG_ASSERT(hton->vers_query_commit_ts); + null_value= !hton->vers_query_commit_ts(thd, &cached_result, commit_ts, VTQ_ALL, backwards); + if (null_value) + { + return 0; + } + + switch (vtq_field) + { + case VTQ_COMMIT_ID: + return cached_result.commit_id; + case VTQ_ISO_LEVEL: + return cached_result.iso_level; + case VTQ_TRX_ID: + return cached_result.trx_id; + default: + DBUG_ASSERT(0); + null_value= true; + } + + return 0; +} + +longlong +Item_func_vtq_id::val_int() +{ + if (!hton) + { + null_value= true; + return 0; + } + + if (args[0]->is_null()) + { + if (arg_count < 2 || vtq_field == VTQ_TRX_ID) + { + null_value= true; + return 0; + } + return get_by_trx_id(args[1]->val_uint()); + } + else + { + MYSQL_TIME commit_ts; + if (args[0]->get_date(&commit_ts, 0)) + { + null_value= true; + return 0; + } + if (arg_count > 1) + { + backwards= args[1]->val_bool(); + DBUG_ASSERT(arg_count == 2); + } + return get_by_commit_ts(commit_ts, backwards); + } +} + +Item_func_vtq_trx_sees::Item_func_vtq_trx_sees( + THD *thd, + handlerton *hton, + Item* a, + Item* b) : + VTQ_common<Item_bool_func>(thd, hton, a, b), + accept_eq(false) +{ + null_value= true; + DBUG_ASSERT(arg_count == 2 && args[0] && args[1]); +} + +longlong +Item_func_vtq_trx_sees::val_int() +{ + THD *thd= current_thd; + DBUG_ASSERT(thd); + + if (!hton) + { + null_value= true; + return 0; + } + + ulonglong trx_id1, trx_id0; + ulonglong commit_id1= 0; + ulonglong commit_id0= 0; + uchar iso_level1= 0; + + DBUG_ASSERT(arg_count > 1); + trx_id1= args[0]->val_uint(); + trx_id0= args[1]->val_uint(); + + vtq_record_t *cached= args[0]->vtq_cached_result(); + if (cached && cached->commit_id) + { + commit_id1= cached->commit_id; + iso_level1= cached->iso_level; + } + + cached= args[1]->vtq_cached_result(); + if (cached && cached->commit_id) + { + commit_id0= cached->commit_id; + } + + if (accept_eq && trx_id1 && trx_id1 == trx_id0) + { + null_value= false; + return true; + } + + DBUG_ASSERT(hton->vers_trx_sees); + bool result= false; + null_value= !hton->vers_trx_sees(thd, result, trx_id1, trx_id0, commit_id1, iso_level1, commit_id0); + return result; +} diff --git a/sql/item_vers.h b/sql/item_vers.h new file mode 100644 index 00000000000..c2b127af817 --- /dev/null +++ b/sql/item_vers.h @@ -0,0 +1,137 @@ +#ifndef ITEM_VERS_INCLUDED +#define ITEM_VERS_INCLUDED +/* Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + + +/* System Versioning items */ + +#include "vtq.h" + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +template <class Item_func_X> +class VTQ_common : public Item_func_X +{ +protected: + handlerton *hton; + void check_hton(); +public: + VTQ_common(THD *thd, handlerton* _hton, Item* a, Item* b) : + Item_func_X(thd, a, b), + hton(_hton) {} + VTQ_common(THD *thd, handlerton* _hton, Item* a) : + Item_func_X(thd, a), + hton(_hton) {} +}; + +class Item_func_vtq_ts : + public VTQ_common<Item_datetimefunc> +{ + vtq_field_t vtq_field; +public: + Item_func_vtq_ts(THD *thd, handlerton *hton, Item* a, vtq_field_t _vtq_field); + const char *func_name() const + { + if (vtq_field == VTQ_BEGIN_TS) + { + return "vtq_begin_ts"; + } + return "vtq_commit_ts"; + } + bool get_date(MYSQL_TIME *res, ulonglong fuzzy_date); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_vtq_ts>(thd, mem_root, this); } + void fix_length_and_dec() { fix_attributes_datetime(decimals); } +}; + +class Item_func_vtq_id : + public VTQ_common<Item_longlong_func> +{ + vtq_field_t vtq_field; + vtq_record_t cached_result; + bool backwards; + + longlong get_by_trx_id(ulonglong trx_id); + longlong get_by_commit_ts(MYSQL_TIME &commit_ts, bool backwards); + +public: + Item_func_vtq_id(THD *thd, handlerton *hton, Item* a, vtq_field_t _vtq_field, bool _backwards= false); + Item_func_vtq_id(THD *thd, handlerton *hton, Item* a, Item* b, vtq_field_t _vtq_field); + + vtq_record_t *vtq_cached_result() { return &cached_result; } + + const char *func_name() const + { + switch (vtq_field) + { + case VTQ_TRX_ID: + return "vtq_trx_id"; + case VTQ_COMMIT_ID: + return "vtq_commit_id"; + case VTQ_ISO_LEVEL: + return "vtq_iso_level"; + default: + DBUG_ASSERT(0); + } + return NULL; + } + + void fix_length_and_dec() + { + Item_int_func::fix_length_and_dec(); + max_length= 20; + } + + longlong val_int(); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_vtq_id>(thd, mem_root, this); } +}; + +class Item_func_vtq_trx_sees : + public VTQ_common<Item_bool_func> +{ +protected: + bool accept_eq; + +public: + Item_func_vtq_trx_sees(THD *thd, handlerton *hton, Item* a, Item* b); + const char *func_name() const + { + return "vtq_trx_sees"; + } + longlong val_int(); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_vtq_trx_sees>(thd, mem_root, this); } +}; + +class Item_func_vtq_trx_sees_eq : + public Item_func_vtq_trx_sees +{ +public: + Item_func_vtq_trx_sees_eq(THD *thd, handlerton *hton, Item* a, Item* b) : + Item_func_vtq_trx_sees(thd, hton, a, b) + { + accept_eq= true; + } + const char *func_name() const + { + return "vtq_trx_sees_eq"; + } +}; + +#endif /* ITEM_VERS_INCLUDED */ diff --git a/sql/lex.h b/sql/lex.h index ef03afb7a32..68c98a95a7b 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -428,6 +428,7 @@ static SYMBOL symbols[] = { { "NONE", SYM(NONE_SYM)}, { "NOT", SYM(NOT_SYM)}, { "NOTFOUND", SYM(NOTFOUND_SYM)}, + { "NOW", SYM(NOW_SYM)}, { "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)}, { "NULL", SYM(NULL_SYM)}, { "NUMBER", SYM(NUMBER_SYM)}, @@ -458,6 +459,7 @@ static SYMBOL symbols[] = { { "PAGE_CHECKSUM", SYM(PAGE_CHECKSUM_SYM)}, { "PARSER", SYM(PARSER_SYM)}, { "PARSE_VCOL_EXPR", SYM(PARSE_VCOL_EXPR_SYM)}, + { "PERIOD", SYM(PERIOD_SYM)}, { "PARTIAL", SYM(PARTIAL)}, { "PARTITION", SYM(PARTITION_SYM)}, { "PARTITIONING", SYM(PARTITIONING_SYM)}, @@ -627,6 +629,8 @@ static SYMBOL symbols[] = { { "SUSPEND", SYM(SUSPEND_SYM)}, { "SWAPS", SYM(SWAPS_SYM)}, { "SWITCHES", SYM(SWITCHES_SYM)}, + { "SYSTEM", SYM(SYSTEM)}, + { "SYSTEM_TIME", SYM(SYSTEM_TIME_SYM)}, { "TABLE", SYM(TABLE_SYM)}, { "TABLE_NAME", SYM(TABLE_NAME_SYM)}, { "TABLES", SYM(TABLES)}, @@ -692,6 +696,7 @@ static SYMBOL symbols[] = { { "VIA", SYM(VIA_SYM)}, { "VIEW", SYM(VIEW_SYM)}, { "VIRTUAL", SYM(VIRTUAL_SYM)}, + { "VERSIONING", SYM(VERSIONING_SYM)}, { "WAIT", SYM(WAIT_SYM)}, { "WARNINGS", SYM(WARNINGS)}, { "WEEK", SYM(WEEK_SYM)}, @@ -701,6 +706,7 @@ static SYMBOL symbols[] = { { "WHILE", SYM(WHILE_SYM)}, { "WINDOW", SYM(WINDOW_SYM)}, { "WITH", SYM(WITH)}, + { "WITHOUT", SYM(WITHOUT)}, { "WORK", SYM(WORK_SYM)}, { "WRAPPER", SYM(WRAPPER_SYM)}, { "WRITE", SYM(WRITE_SYM)}, @@ -738,7 +744,6 @@ static SYMBOL sql_functions[] = { { "MAX", SYM(MAX_SYM)}, { "MID", SYM(SUBSTRING)}, /* unireg function */ { "MIN", SYM(MIN_SYM)}, - { "NOW", SYM(NOW_SYM)}, { "NTH_VALUE", SYM(NTH_VALUE_SYM)}, { "NTILE", SYM(NTILE_SYM)}, { "POSITION", SYM(POSITION_SYM)}, diff --git a/sql/log_event.cc b/sql/log_event.cc index f1ceaec6456..102fbc251e3 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -44,6 +44,7 @@ #include <strfunc.h> #include "compat56.h" #include "wsrep_mysqld.h" +#include "sql_insert.h" #endif /* MYSQL_CLIENT */ #include <my_bitmap.h> @@ -12504,6 +12505,22 @@ Rows_log_event::write_row(rpl_group_info *rgi, DBUG_RETURN(HA_ERR_GENERIC); // in case if error is not set yet } + // Handle INSERT. + // Set vers fields when replicating from not system-versioned table. + if (m_type == WRITE_ROWS_EVENT_V1 && table->versioned_by_sql()) + { + bitmap_set_bit(table->read_set, table->vers_start_field()->field_index); + // Check whether a row came from unversioned table and fix vers fields. + if (table->vers_start_field()->get_timestamp() == 0) + { + bitmap_set_bit(table->write_set, table->vers_start_field()->field_index); + bitmap_set_bit(table->write_set, table->vers_end_field()->field_index); + thd->set_current_time(); + table->vers_start_field()->set_time(); + table->vers_end_field()->set_max(); + } + } + /* Try to write record. If a corresponding record already exists in the table, we try to change it using ha_update_row() if possible. Otherwise we delete @@ -12824,7 +12841,10 @@ static bool record_compare(TABLE *table) /* Compare fields */ for (Field **ptr=table->field ; *ptr ; ptr++) { - + if (table->versioned() && (*ptr)->vers_sys_field()) + { + continue; + } /** We only compare field contents that are not null. NULL fields (i.e., their null bits) were compared @@ -13019,6 +13039,27 @@ int Rows_log_event::find_row(rpl_group_info *rgi) prepare_record(table, m_width, FALSE); error= unpack_current_row(rgi); + m_vers_from_plain= false; + if (table->versioned()) + { + Field *sys_trx_end= table->vers_end_field(); + DBUG_ASSERT(table->read_set); + bitmap_set_bit(table->read_set, sys_trx_end->field_index); + // check whether master table is unversioned + if (sys_trx_end->val_int() == 0) + { + // sys_trx_start initialized with NULL when came from plain table. + // Set it notnull() because record_compare() count NULLs. + table->vers_start_field()->set_notnull(); + bitmap_set_bit(table->write_set, sys_trx_end->field_index); + // Plain source table may have a PRIMARY KEY. And sys_trx_end is always + // a part of PRIMARY KEY. Set it to max value for engine to find it in + // index. Needed for an UPDATE/DELETE cases. + table->vers_end_field()->set_max(); + m_vers_from_plain= true; + } + } + DBUG_PRINT("info",("looking for the following record")); DBUG_DUMP("record[0]", table->record[0], table->s->reclength); @@ -13400,7 +13441,19 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) if (!error) { m_table->mark_columns_per_binlog_row_image(); - error= m_table->file->ha_delete_row(m_table->record[0]); + if (m_vers_from_plain && m_table->versioned_by_sql()) + { + Field *end= m_table->vers_end_field(); + bitmap_set_bit(m_table->write_set, end->field_index); + store_record(m_table, record[1]); + end->set_time(); + error= m_table->file->ha_update_row(m_table->record[1], + m_table->record[0]); + } + else + { + error= m_table->file->ha_delete_row(m_table->record[0]); + } m_table->default_column_bitmaps(); } if (invoke_triggers && !error && @@ -13657,9 +13710,22 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) memcpy(m_table->write_set->bitmap, m_cols_ai.bitmap, (m_table->write_set->n_bits + 7) / 8); m_table->mark_columns_per_binlog_row_image(); + if (m_vers_from_plain && m_table->versioned_by_sql()) + { + bitmap_set_bit(m_table->write_set, + m_table->vers_start_field()->field_index); + thd->set_current_time(); + m_table->vers_start_field()->set_time(); + } error= m_table->file->ha_update_row(m_table->record[1], m_table->record[0]); if (error == HA_ERR_RECORD_IS_THE_SAME) error= 0; + if (m_vers_from_plain && m_table->versioned_by_sql()) + { + store_record(m_table, record[2]); + error= vers_insert_history_row(m_table); + restore_record(m_table, record[2]); + } m_table->default_column_bitmaps(); if (invoke_triggers && !error && diff --git a/sql/log_event.h b/sql/log_event.h index c8f3241cb3d..2dedd8bbe9a 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -4594,6 +4594,8 @@ protected: uchar *m_extra_row_data; /* Pointer to extra row data if any */ /* If non null, first byte is length */ + bool m_vers_from_plain; + /* helper functions */ @@ -4744,6 +4746,7 @@ public: __attribute__((unused)), const uchar *after_record) { + DBUG_ASSERT(!table->versioned_by_engine()); return thd->binlog_write_row(table, is_transactional, after_record); } #endif @@ -4825,6 +4828,7 @@ public: const uchar *before_record, const uchar *after_record) { + DBUG_ASSERT(!table->versioned_by_engine()); return thd->binlog_update_row(table, is_transactional, before_record, after_record); } @@ -4914,6 +4918,7 @@ public: const uchar *after_record __attribute__((unused))) { + DBUG_ASSERT(!table->versioned_by_engine()); return thd->binlog_delete_row(table, is_transactional, before_record); } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index ad27c58f67f..14cfc7e9e5a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -943,6 +943,9 @@ PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered, key_LOCK_slave_background; PSI_mutex_key key_TABLE_SHARE_LOCK_share; +PSI_mutex_key key_TABLE_SHARE_LOCK_rotation; +PSI_cond_key key_TABLE_SHARE_COND_rotation; + static PSI_mutex_info all_server_mutexes[]= { #ifdef HAVE_MMAP @@ -1004,6 +1007,7 @@ static PSI_mutex_info all_server_mutexes[]= { &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0}, { &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0}, { &key_TABLE_SHARE_LOCK_share, "TABLE_SHARE::LOCK_share", 0}, + { &key_TABLE_SHARE_LOCK_rotation, "TABLE_SHARE::LOCK_rotation", 0}, { &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL}, { &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL}, { &key_LOCK_after_binlog_sync, "LOCK_after_binlog_sync", PSI_FLAG_GLOBAL}, @@ -1024,8 +1028,8 @@ static PSI_mutex_info all_server_mutexes[]= PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock, - key_LOCK_SEQUENCE; - + key_LOCK_SEQUENCE, + key_rwlock_LOCK_vers_stats, key_rwlock_LOCK_stat_serial; static PSI_rwlock_info all_server_rwlocks[]= { @@ -1038,7 +1042,9 @@ static PSI_rwlock_info all_server_rwlocks[]= { &key_rwlock_LOCK_sys_init_slave, "LOCK_sys_init_slave", PSI_FLAG_GLOBAL}, { &key_LOCK_SEQUENCE, "LOCK_SEQUENCE", 0}, { &key_rwlock_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL}, - { &key_rwlock_query_cache_query_lock, "Query_cache_query::lock", 0} + { &key_rwlock_query_cache_query_lock, "Query_cache_query::lock", 0}, + { &key_rwlock_LOCK_vers_stats, "Vers_field_stats::lock", 0}, + { &key_rwlock_LOCK_stat_serial, "TABLE_SHARE::LOCK_stat_serial", 0} }; #ifdef HAVE_MMAP @@ -1121,7 +1127,8 @@ static PSI_cond_info all_server_conds[]= { &key_COND_slave_background, "COND_slave_background", 0}, { &key_COND_start_thread, "COND_start_thread", PSI_FLAG_GLOBAL}, { &key_COND_wait_gtid, "COND_wait_gtid", 0}, - { &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0} + { &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0}, + { &key_TABLE_SHARE_COND_rotation, "TABLE_SHARE::COND_rotation", 0} }; PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, @@ -9432,6 +9439,17 @@ mysqld_get_one_option(int optid, const struct my_option *opt, char *argument) WSREP_SYNC_WAIT_BEFORE_READ); break; #endif /* WITH_WSREP */ + case OPT_VERS_CURRENT_TIME: + sys_var *var= static_cast<sys_var*>(opt->app_type); + DBUG_ASSERT(var); + if (var->option_updated()) + { + sql_print_error("Can't start server: " + "cannot process --vers-current-time=%.*s", + FN_REFLEN, argument); + return 1; + } + break; } return 0; } diff --git a/sql/mysqld.h b/sql/mysqld.h index 8364eecdb7c..6f5550d20ae 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -179,6 +179,48 @@ extern char *opt_backup_history_logname, *opt_backup_progress_logname, *opt_backup_settings_name; extern const char *log_output_str; extern const char *log_backup_output_str; + +/* System Versioning begin */ +enum vers_range_type_t +{ + FOR_SYSTEM_TIME_UNSPECIFIED = 0, + FOR_SYSTEM_TIME_AS_OF, + FOR_SYSTEM_TIME_FROM_TO, + FOR_SYSTEM_TIME_BETWEEN, + FOR_SYSTEM_TIME_ALL, + FOR_SYSTEM_TIME_BEFORE +}; + +/* Used only for @@versioning_current_time sysvar. This struct must be POD + * because of str_value, which is used as interface to user. + * So no virtual-anything! */ +struct st_vers_current_time +{ + char *str_value; // must be first + vers_range_type_t type; + MYSQL_TIME ltime; + st_vers_current_time() : + str_value(NULL), + type(FOR_SYSTEM_TIME_UNSPECIFIED) + {} +}; + +enum vers_hide_enum +{ + VERS_HIDE_AUTO= 0, + VERS_HIDE_IMPLICIT, + VERS_HIDE_FULL, + VERS_HIDE_NEVER +}; + +enum vers_alter_history_enum +{ + VERS_ALTER_HISTORY_KEEP= 0, + VERS_ALTER_HISTORY_SURVIVE, + VERS_ALTER_HISTORY_DROP +}; +/* System Versioning end */ + extern char *mysql_home_ptr, *pidfile_name_ptr; extern MYSQL_PLUGIN_IMPORT char glob_hostname[FN_REFLEN]; extern char mysql_home[FN_REFLEN]; @@ -313,13 +355,16 @@ extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, extern PSI_mutex_key key_TABLE_SHARE_LOCK_share, key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, - key_LOCK_global_index_stats, key_LOCK_wakeup_ready, key_LOCK_wait_commit; + key_LOCK_global_index_stats, key_LOCK_wakeup_ready, key_LOCK_wait_commit, + key_TABLE_SHARE_LOCK_rotation; extern PSI_mutex_key key_LOCK_gtid_waiting; extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock, - key_LOCK_SEQUENCE; + key_LOCK_SEQUENCE, + key_rwlock_LOCK_vers_stats, key_rwlock_LOCK_stat_serial; + #ifdef HAVE_MMAP extern PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool; #endif /* HAVE_MMAP */ @@ -347,6 +392,7 @@ extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_queue, key_COND_rpl_thread_stop, key_COND_rpl_thread_pool, key_COND_parallel_entry, key_COND_group_commit_orderer; extern PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates; +extern PSI_cond_key key_TABLE_SHARE_COND_rotation; extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, key_thread_handle_manager, key_thread_kill_server, key_thread_main, @@ -649,6 +695,7 @@ enum options_mysqld OPT_SSL_KEY, OPT_THREAD_CONCURRENCY, OPT_WANT_CORE, + OPT_VERS_CURRENT_TIME, #ifdef WITH_WSREP OPT_WSREP_CAUSAL_READS, OPT_WSREP_SYNC_WAIT, diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 3180c509737..675064067fc 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3454,6 +3454,13 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) free_root(&alloc,MYF(0)); // Return memory & allocator DBUG_RETURN(FALSE); } + + if (part_info->part_type == VERSIONING_PARTITION && + part_info->vers_update_range_constants(thd)) + { + retval= TRUE; + goto end2; + } dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set); @@ -3554,6 +3561,7 @@ all_used: mark_all_partitions_as_used(prune_param.part_info); end: dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets); +end2: thd->no_errors=0; thd->mem_root= range_par->old_root; free_root(&alloc,MYF(0)); // Return memory & allocator @@ -3980,7 +3988,7 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) simply set res= -1 as if the mapper had returned that. TODO: What to do here is defined in WL#4065. */ - if (ppar->arg_stack[0]->part == 0) + if (ppar->arg_stack[0]->part == 0 || ppar->part_info->part_type == VERSIONING_PARTITION) { uint32 i; uint32 store_length_array[MAX_KEY]; diff --git a/sql/partition_element.h b/sql/partition_element.h index c774994b7f5..fc486abffc2 100644 --- a/sql/partition_element.h +++ b/sql/partition_element.h @@ -26,7 +26,8 @@ enum partition_type { NOT_A_PARTITION= 0, RANGE_PARTITION, HASH_PARTITION, - LIST_PARTITION + LIST_PARTITION, + VERSIONING_PARTITION }; enum partition_state { @@ -89,8 +90,74 @@ typedef struct p_elem_val struct st_ddl_log_memory_entry; -class partition_element :public Sql_alloc { +/* Used for collecting MIN/MAX stats on sys_trx_end for doing pruning + in SYSTEM_TIME partitiong. */ +class Vers_min_max_stats : public Sql_alloc +{ + static const uint buf_size= 4 + (TIME_SECOND_PART_DIGITS + 1) / 2; + uchar min_buf[buf_size]; + uchar max_buf[buf_size]; + Field_timestampf min_value; + Field_timestampf max_value; + mysql_rwlock_t lock; + +public: + Vers_min_max_stats(const LEX_CSTRING *field_name, TABLE_SHARE *share) : + min_value(min_buf, NULL, 0, Field::NONE, field_name, share, 6), + max_value(max_buf, NULL, 0, Field::NONE, field_name, share, 6) + { + min_value.set_max(); + memset(max_buf, 0, buf_size); + mysql_rwlock_init(key_rwlock_LOCK_vers_stats, &lock); + } + ~Vers_min_max_stats() + { + mysql_rwlock_destroy(&lock); + } + bool update_unguarded(Field *from) + { + return + from->update_min(&min_value, false) + + from->update_max(&max_value, false); + } + bool update(Field *from) + { + mysql_rwlock_wrlock(&lock); + bool res= update_unguarded(from); + mysql_rwlock_unlock(&lock); + return res; + } + my_time_t min_time() + { + mysql_rwlock_rdlock(&lock); + my_time_t res= min_value.get_timestamp(); + mysql_rwlock_unlock(&lock); + return res; + } + my_time_t max_time() + { + mysql_rwlock_rdlock(&lock); + my_time_t res= max_value.get_timestamp(); + mysql_rwlock_unlock(&lock); + return res; + } +}; + +enum stat_trx_field +{ + STAT_TRX_END= 0 +}; + +class partition_element :public Sql_alloc +{ public: + enum elem_type + { + CONVENTIONAL= 0, + AS_OF_NOW, + VERSIONING + }; + List<partition_element> subpartitions; List<part_elem_value> list_val_list; ha_rows part_max_rows; @@ -109,6 +176,21 @@ public: bool has_null_value; bool signed_flag; // Range value signed bool max_value; // MAXVALUE range + uint32 id; + bool empty; + + // TODO: subclass partition_element by partitioning type to avoid such semantic + // mixup + elem_type type() + { + return (elem_type)(signed_flag << 1 | max_value); + } + + void type(elem_type val) + { + max_value= val & 1; + signed_flag= val & 2; + } partition_element() : part_max_rows(0), part_min_rows(0), range_value(0), @@ -117,9 +199,10 @@ public: data_file_name(NULL), index_file_name(NULL), engine_type(NULL), connect_string(null_clex_str), part_state(PART_NORMAL), nodegroup_id(UNDEF_NODEGROUP), has_null_value(FALSE), - signed_flag(FALSE), max_value(FALSE) - { - } + signed_flag(FALSE), max_value(FALSE), + id(UINT32_MAX), + empty(true) + {} partition_element(partition_element *part_elem) : part_max_rows(part_elem->part_max_rows), part_min_rows(part_elem->part_min_rows), @@ -132,10 +215,20 @@ public: connect_string(null_clex_str), part_state(part_elem->part_state), nodegroup_id(part_elem->nodegroup_id), - has_null_value(FALSE) + has_null_value(FALSE), + id(part_elem->id), + empty(part_elem->empty) + {} + ~partition_element() {} + + part_column_list_val& get_col_val(uint idx) { + DBUG_ASSERT(type() == CONVENTIONAL || list_val_list.elements == 1); + part_elem_value *ev= list_val_list.head(); + DBUG_ASSERT(ev); + DBUG_ASSERT(ev->col_val_array); + return ev->col_val_array[idx]; } - ~partition_element() {} }; #endif /* PARTITION_ELEMENT_INCLUDED */ diff --git a/sql/partition_info.cc b/sql/partition_info.cc index 6fdbdfce893..a06179fe132 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -21,6 +21,8 @@ #endif #include "mariadb.h" +#include <my_global.h> +#include <tztime.h> #include "sql_priv.h" // Required to get server definitions for mysql/plugin.h right #include "sql_plugin.h" @@ -30,6 +32,7 @@ #include "sql_parse.h" #include "sql_acl.h" // *_ACL #include "sql_base.h" // fill_record +#include "sql_statistics.h" // vers_stat_end #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" @@ -42,13 +45,12 @@ partition_info *partition_info::get_clone(THD *thd) List_iterator<partition_element> part_it(partitions); partition_element *part; - partition_info *clone= new (mem_root) partition_info(); + partition_info *clone= new (mem_root) partition_info(*this); if (!clone) { mem_alloc_error(sizeof(partition_info)); DBUG_RETURN(NULL); } - memcpy(clone, this, sizeof(partition_info)); memset(&(clone->read_partitions), 0, sizeof(clone->read_partitions)); memset(&(clone->lock_partitions), 0, sizeof(clone->lock_partitions)); clone->bitmaps_are_initialized= FALSE; @@ -114,6 +116,19 @@ partition_info *partition_info::get_clone(THD *thd) part_clone->list_val_list.push_back(new_val, mem_root); } } + if (part_type == VERSIONING_PARTITION && vers_info) + { + // clone Vers_part_info; set now_part, hist_part + clone->vers_info= new (mem_root) Vers_part_info(*vers_info); + List_iterator<partition_element> it(clone->partitions); + while ((part= it++)) + { + if (vers_info->now_part && part->id == vers_info->now_part->id) + clone->vers_info->now_part= part; + else if (vers_info->hist_part && part->id == vers_info->hist_part->id) + clone->vers_info->hist_part= part; + } // while ((part= it++)) + } // if (part_type == VERSIONING_PARTITION ... DBUG_RETURN(clone); } @@ -200,6 +215,48 @@ bool partition_info::set_named_partition_bitmap(const char *part_name, @param table_list Table list pointing to table to prune. @return Operation status + @retval false Success + @retval true Failure +*/ +bool partition_info::set_read_partitions(List<char> *partition_names) +{ + DBUG_ENTER("partition_info::set_read_partitions"); + if (!partition_names || !partition_names->elements) + { + DBUG_RETURN(true); + } + + uint num_names= partition_names->elements; + List_iterator<char> partition_names_it(*partition_names); + uint i= 0; + /* + TODO: When adding support for FK in partitioned tables, the referenced + table must probably lock all partitions for read, and also write depending + of ON DELETE/UPDATE. + */ + bitmap_clear_all(&read_partitions); + + /* No check for duplicate names or overlapping partitions/subpartitions. */ + + DBUG_PRINT("info", ("Searching through partition_name_hash")); + do + { + char *part_name= partition_names_it++; + if (add_named_partition(part_name, strlen(part_name))) + DBUG_RETURN(true); + } while (++i < num_names); + DBUG_RETURN(false); +} + + + +/** + Prune away partitions not mentioned in the PARTITION () clause, + if used. + + @param table_list Table list pointing to table to prune. + + @return Operation status @retval true Failure @retval false Success */ @@ -780,6 +837,437 @@ bool partition_info::has_unique_name(partition_element *element) DBUG_RETURN(TRUE); } +bool partition_info::vers_init_info(THD * thd) +{ + part_type= VERSIONING_PARTITION; + list_of_part_fields= TRUE; + column_list= TRUE; + num_columns= 1; + vers_info= new (thd->mem_root) Vers_part_info; + if (!vers_info) + { + mem_alloc_error(sizeof(Vers_part_info)); + return true; + } + return false; +} + +bool partition_info::vers_set_interval(const INTERVAL & i) +{ + if (i.neg || i.second_part) + return true; + + DBUG_ASSERT(vers_info); + + // TODO: INTERVAL conversion to seconds leads to mismatch with calendar intervals (MONTH and YEAR) + vers_info->interval= + i.second + + i.minute * 60 + + i.hour * 60 * 60 + + i.day * 24 * 60 * 60 + + i.month * 30 * 24 * 60 * 60 + + i.year * 365 * 30 * 24 * 60 * 60; + + if (vers_info->interval == 0) + return true; + + return false; +} + +bool partition_info::vers_set_limit(ulonglong limit) +{ + if (limit < 1) + return true; + + DBUG_ASSERT(vers_info); + + vers_info->limit= limit; + return false; +} + +partition_element* +partition_info::vers_part_rotate(THD * thd) +{ + DBUG_ASSERT(table && table->s); + DBUG_ASSERT(vers_info && vers_info->initialized()); + + if (table->s->hist_part_id >= vers_info->now_part->id - 1) + { + DBUG_ASSERT(table->s->hist_part_id == vers_info->now_part->id - 1); + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PART_FULL, + ER_THD(thd, WARN_VERS_PART_FULL), + vers_info->hist_part->partition_name); + return vers_info->hist_part; + } + + table->s->hist_part_id++; + const char* old_part_name= vers_info->hist_part->partition_name; + vers_hist_part(); + + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, + WARN_VERS_PART_ROTATION, + ER_THD(thd, WARN_VERS_PART_ROTATION), + old_part_name, + vers_info->hist_part->partition_name); + + return vers_info->hist_part; +} + +bool partition_info::vers_set_expression(THD *thd, partition_element *el, MYSQL_TIME& t) +{ + curr_part_elem= el; + init_column_part(thd); + el->list_val_list.empty(); + el->list_val_list.push_back(curr_list_val, thd->mem_root); + for (uint i= 0; i < num_columns; ++i) + { + part_column_list_val *col_val= add_column_value(thd); + if (el->type() == partition_element::AS_OF_NOW) + { + col_val->max_value= true; + col_val->item_expression= NULL; + col_val->column_value= NULL; + col_val->part_info= this; + col_val->fixed= 1; + continue; + } + Item *item_expression= new (thd->mem_root) Item_datetime_literal(thd, &t); + if (!item_expression) + return true; + /* We initialize col_val with bogus max value to make fix_partition_func() and check_range_constants() happy. + Later in vers_setup_stats() it is initialized with real stat value if there will be any. */ + /* FIXME: TIME_RESULT in col_val is expensive. It should be INT_RESULT + (got to be fixed when InnoDB is supported). */ + init_col_val(col_val, item_expression); + DBUG_ASSERT(item_expression == el->get_col_val(i).item_expression); + } // for (num_columns) + return false; +} + +bool partition_info::vers_setup_expression(THD * thd, uint32 alter_add) +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + + if (!table->versioned()) + { + my_error(ER_VERSIONING_REQUIRED, MYF(0), table->s->table_name); + return true; + } + + if (alter_add) + { + DBUG_ASSERT(partitions.elements > alter_add + 1); + Vers_min_max_stats** old_array= table->s->stat_trx; + table->s->stat_trx= static_cast<Vers_min_max_stats**>( + alloc_root(&table->s->mem_root, sizeof(void *) * partitions.elements * num_columns)); + memcpy(table->s->stat_trx, old_array, sizeof(void *) * (partitions.elements - alter_add) * num_columns); + } + else + { + /* Prepare part_field_list */ + Field *sys_trx_end= table->vers_end_field(); + part_field_list.push_back(sys_trx_end->field_name.str, thd->mem_root); + DBUG_ASSERT(part_field_list.elements == num_columns); + // needed in handle_list_of_fields() + sys_trx_end->flags|= GET_FIXED_FIELDS_FLAG; + } + + List_iterator<partition_element> it(partitions); + partition_element *el; + MYSQL_TIME t; + memset(&t, 0, sizeof(t)); + my_time_t ts= TIMESTAMP_MAX_VALUE - partitions.elements; + uint32 id= 0; + while ((el= it++)) + { + DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL); + ++ts; + if (alter_add) + { + /* Non-empty historical partitions are left as is. */ + if (el->type() == partition_element::VERSIONING && !el->empty) + { + ++id; + continue; + } + /* Newly added element is inserted before AS_OF_NOW. */ + if (el->id == UINT32_MAX || el->type() == partition_element::AS_OF_NOW) + { + DBUG_ASSERT(table && table->s); + Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root) + Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s); + table->s->stat_trx[id * num_columns + STAT_TRX_END]= stat_trx_end; + el->id= id++; + if (el->type() == partition_element::AS_OF_NOW) + break; + goto set_expression; + } + /* Existing element expression is recalculated. */ + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + for (uint i= 0; i < num_columns; ++i) + { + part_column_list_val &col_val= el->get_col_val(i); + static_cast<Item_datetime_literal *>(col_val.item_expression)->set_time(&t); + col_val.fixed= 0; + } + ++id; + continue; + } + + set_expression: + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + if (vers_set_expression(thd, el, t)) + return true; + } + return false; +} + + +// scan table for min/max sys_trx_end +inline +bool partition_info::vers_scan_min_max(THD *thd, partition_element *part) +{ + uint32 sub_factor= num_subparts ? num_subparts : 1; + uint32 part_id= part->id * sub_factor; + uint32 part_id_end= part_id + sub_factor; + DBUG_ASSERT(part->empty); + DBUG_ASSERT(part->type() == partition_element::VERSIONING); + DBUG_ASSERT(table->s->stat_trx); + for (; part_id < part_id_end; ++part_id) + { + handler *file= table->file->part_handler(part_id); // requires update_partition() for ha_innopart + DBUG_ASSERT(file); + int rc= file->ha_external_lock(thd, F_RDLCK); // requires ha_commit_trans() for ha_innobase + if (rc) + { + file->update_partition(part_id); + goto lock_fail; + } + + table->default_column_bitmaps(); + bitmap_set_bit(table->read_set, table->vers_end_field()->field_index); + file->column_bitmaps_signal(); + + rc= file->ha_rnd_init(true); + if (!rc) + { + while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) + { + if (part->empty) + part->empty= false; + if (thd->killed) + { + file->ha_rnd_end(); + file->update_partition(part_id); + ha_commit_trans(thd, false); + return true; + } + if (rc) + { + if (rc == HA_ERR_RECORD_DELETED) + continue; + break; + } + if (table->vers_end_field()->is_max()) + { + rc= HA_ERR_INTERNAL_ERROR; + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PART_NON_HISTORICAL, + ER_THD(thd, WARN_VERS_PART_NON_HISTORICAL), + part->partition_name); + break; + } + if (table->versioned_by_engine()) + { + uchar buf[8]; + Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); + if (!vers_trx_id_to_ts(thd, table->vers_end_field(), fld)) + { + vers_stat_trx(STAT_TRX_END, part).update_unguarded(&fld); + } + } + else + { + vers_stat_trx(STAT_TRX_END, part).update_unguarded(table->vers_end_field()); + } + } + file->ha_rnd_end(); + } + file->ha_external_lock(thd, F_UNLCK); + file->update_partition(part_id); + if (rc != HA_ERR_END_OF_FILE) + { + ha_commit_trans(thd, false); + lock_fail: + // TODO: print rc code + my_error(ER_INTERNAL_ERROR, MYF(0), "min/max scan failed in versioned partitions setup (see warnings)"); + return true; + } + } + ha_commit_trans(thd, false); + return false; +} + +void partition_info::vers_update_col_vals(THD *thd, partition_element *el0, partition_element *el1) +{ + MYSQL_TIME t; + memset(&t, 0, sizeof(t)); + DBUG_ASSERT(table && table->s && table->s->stat_trx); + DBUG_ASSERT(!el0 || el1->id == el0->id + 1); + const uint idx= el1->id * num_columns; + my_time_t ts; + part_column_list_val *col_val; + Item_datetime_literal *val_item; + Vers_min_max_stats *stat_trx_x; + for (uint i= 0; i < num_columns; ++i) + { + stat_trx_x= table->s->stat_trx[idx + i]; + if (el0) + { + ts= stat_trx_x->min_time(); + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + col_val= &el0->get_col_val(i); + val_item= static_cast<Item_datetime_literal*>(col_val->item_expression); + DBUG_ASSERT(val_item); + if (*val_item > t) + { + val_item->set_time(&t); + col_val->fixed= 0; + } + } + col_val= &el1->get_col_val(i); + if (!col_val->max_value) + { + ts= stat_trx_x->max_time() + 1; + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + val_item= static_cast<Item_datetime_literal*>(col_val->item_expression); + DBUG_ASSERT(val_item); + if (*val_item < t) + { + val_item->set_time(&t); + col_val->fixed= 0; + } + } + } +} + + +// setup at open() phase (TABLE_SHARE is initialized) +bool partition_info::vers_setup_stats(THD * thd, bool is_create_table_ind) +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + DBUG_ASSERT(vers_info && vers_info->initialized(false)); + DBUG_ASSERT(table && table->s); + + bool error= false; + + mysql_mutex_lock(&table->s->LOCK_rotation); + if (table->s->busy_rotation) + { + table->s->vers_wait_rotation(); + vers_hist_part(); + } + else + { + table->s->busy_rotation= true; + mysql_mutex_unlock(&table->s->LOCK_rotation); + + DBUG_ASSERT(part_field_list.elements == num_columns); + + bool dont_stat= true; + bool col_val_updated= false; + // initialize stat_trx + if (!table->s->stat_trx) + { + DBUG_ASSERT(partitions.elements > 1); + table->s->stat_trx= static_cast<Vers_min_max_stats**>( + alloc_root(&table->s->mem_root, sizeof(void *) * partitions.elements * num_columns)); + dont_stat= false; + } + + // build freelist, scan min/max, assign hist_part + List_iterator<partition_element> it(partitions); + partition_element *el= NULL, *prev; + while ((prev= el, el= it++)) + { + if (el->type() == partition_element::VERSIONING && dont_stat) + { + if (el->id == table->s->hist_part_id) + { + vers_info->hist_part= el; + break; + } + continue; + } + + { + Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root) + Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s); + table->s->stat_trx[el->id * num_columns + STAT_TRX_END]= stat_trx_end; + } + + if (!is_create_table_ind) + { + if (el->type() == partition_element::AS_OF_NOW) + { + uchar buf[8]; + Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); + fld.set_max(); + vers_stat_trx(STAT_TRX_END, el).update_unguarded(&fld); + el->empty= false; + } + else if (vers_scan_min_max(thd, el)) + { + table->s->stat_trx= NULL; // may be a leak on endless table open + error= true; + break; + } + if (!el->empty) + { + vers_update_col_vals(thd, prev, el); + col_val_updated= true; + } + } + + if (el->type() == partition_element::AS_OF_NOW) + break; + + DBUG_ASSERT(el->type() == partition_element::VERSIONING); + + if (vers_info->hist_part) + { + if (!el->empty) + goto set_hist_part; + } + else + { + set_hist_part: + vers_info->hist_part= el; + continue; + } + } // while + + if (!error && !dont_stat) + { + if (col_val_updated) + table->s->stat_serial++; + + table->s->hist_part_id= vers_info->hist_part->id; + if (!is_create_table_ind && (vers_limit_exceed() || vers_interval_exceed())) + vers_part_rotate(thd); + } + mysql_mutex_lock(&table->s->LOCK_rotation); + mysql_cond_broadcast(&table->s->COND_rotation); + table->s->busy_rotation= false; + } + mysql_mutex_unlock(&table->s->LOCK_rotation); + return error; +} + /* Check that the partition/subpartition is setup to use the correct @@ -963,7 +1451,7 @@ error: called for RANGE PARTITIONed tables. */ -bool partition_info::check_range_constants(THD *thd) +bool partition_info::check_range_constants(THD *thd, bool alloc) { partition_element* part_def; bool first= TRUE; @@ -980,12 +1468,15 @@ bool partition_info::check_range_constants(THD *thd) part_column_list_val *UNINIT_VAR(current_largest_col_val); uint num_column_values= part_field_list.elements; uint size_entries= sizeof(part_column_list_val) * num_column_values; - range_col_array= (part_column_list_val*) thd->calloc(num_parts * - size_entries); - if (unlikely(range_col_array == NULL)) + if (alloc) { - mem_alloc_error(num_parts * size_entries); - goto end; + range_col_array= (part_column_list_val*) thd->calloc(num_parts * + size_entries); + if (unlikely(range_col_array == NULL)) + { + mem_alloc_error(num_parts * size_entries); + goto end; + } } loc_range_col_array= range_col_array; i= 0; @@ -1018,11 +1509,14 @@ bool partition_info::check_range_constants(THD *thd) longlong part_range_value; bool signed_flag= !part_expr->unsigned_flag; - range_int_array= (longlong*) thd->alloc(num_parts * sizeof(longlong)); - if (unlikely(range_int_array == NULL)) + if (alloc) { - mem_alloc_error(num_parts * sizeof(longlong)); - goto end; + range_int_array= (longlong*) thd->alloc(num_parts * sizeof(longlong)); + if (unlikely(range_int_array == NULL)) + { + mem_alloc_error(num_parts * sizeof(longlong)); + goto end; + } } i= 0; do @@ -1386,6 +1880,8 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, uint i, tot_partitions; bool result= TRUE, table_engine_set; const char *same_name; + uint32 hist_parts= 0; + uint32 now_parts= 0; DBUG_ENTER("partition_info::check_partition_info"); DBUG_ASSERT(default_engine_type != partition_hton); @@ -1427,7 +1923,8 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, } if (unlikely(is_sub_partitioned() && (!(part_type == RANGE_PARTITION || - part_type == LIST_PARTITION)))) + part_type == LIST_PARTITION || + part_type == VERSIONING_PARTITION)))) { /* Only RANGE and LIST partitioning can be subpartitioned */ my_error(ER_SUBPARTITION_ERROR, MYF(0)); @@ -1489,6 +1986,19 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, my_error(ER_SAME_NAME_PARTITION, MYF(0), same_name); goto end; } + + if (part_type == VERSIONING_PARTITION) + { + DBUG_ASSERT(vers_info); + if (num_parts < 2 || !vers_info->now_part) + { + DBUG_ASSERT(info && info->alias); + my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias); + goto end; + } + DBUG_ASSERT(vers_info->initialized(false)); + DBUG_ASSERT(num_parts == partitions.elements); + } i= 0; { List_iterator<partition_element> part_it(partitions); @@ -1569,6 +2079,18 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, } } } + if (part_type == VERSIONING_PARTITION) + { + if (part_elem->type() == partition_element::VERSIONING) + { + hist_parts++; + } + else + { + DBUG_ASSERT(part_elem->type() == partition_element::AS_OF_NOW); + now_parts++; + } + } } while (++i < num_parts); if (!table_engine_set && num_parts_not_set != 0 && @@ -1600,12 +2122,29 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, if (add_or_reorg_part) { - if (unlikely((part_type == RANGE_PARTITION && + if (unlikely(((part_type == RANGE_PARTITION || part_type == VERSIONING_PARTITION) && check_range_constants(thd)) || (part_type == LIST_PARTITION && check_list_constants(thd)))) goto end; } + + if (hist_parts > 1) + { + if (vers_info->limit == 0 && vers_info->interval == 0) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PARAMETERS, + ER_THD(thd, WARN_VERS_PARAMETERS), + "no rotation condition for multiple `VERSIONING` partitions."); + } + } + if (now_parts > 1) + { + my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias); + goto end; + } result= FALSE; end: DBUG_RETURN(result); @@ -2815,6 +3354,81 @@ bool partition_info::has_same_partitioning(partition_info *new_part_info) } +static bool has_same_column_order(List<Create_field> *create_list, + Field** field_array) +{ + Field **f_ptr; + List_iterator_fast<Create_field> new_field_it; + Create_field *new_field= NULL; + new_field_it.init(*create_list); + + for (f_ptr= field_array; *f_ptr; f_ptr++) + { + while ((new_field= new_field_it++)) + { + if (new_field->field == *f_ptr) + break; + } + if (!new_field) + break; + } + + if (!new_field) + { + /* Not same order!*/ + return false; + } + return true; +} + +bool partition_info::vers_trx_id_to_ts(THD* thd, Field* in_trx_id, Field_timestamp& out_ts) +{ + DBUG_ASSERT(table); + handlerton *hton= plugin_hton(table->s->db_plugin); + DBUG_ASSERT(hton); + ulonglong trx_id= in_trx_id->val_int(); + MYSQL_TIME ts; + bool found= hton->vers_query_trx_id(thd, &ts, trx_id, VTQ_COMMIT_TS); + if (!found) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_TRX_MISSING, + ER_THD(thd, WARN_VERS_TRX_MISSING), + trx_id); + return true; + } + out_ts.store_time_dec(&ts, 6); + return false; +} + + +/** + Check if the partitioning columns are in the same order as the given list. + + Used to see if INPLACE alter can be allowed or not. If the order is + different then the rows must be redistributed for KEY [sub]partitioning. + + @param[in] create_list Column list after ALTER TABLE. + @return true is same order as before ALTER TABLE, else false. +*/ +bool partition_info::same_key_column_order(List<Create_field> *create_list) +{ + /* Only need to check for KEY [sub] partitioning. */ + if (list_of_part_fields && !column_list) + { + if (!has_same_column_order(create_list, part_field_array)) + return false; + } + if (list_of_subpart_fields) + { + if (!has_same_column_order(create_list, subpart_field_array)) + return false; + } + return true; +} + + void partition_info::print_debug(const char *str, uint *value) { DBUG_ENTER("print_debug"); diff --git a/sql/partition_info.h b/sql/partition_info.h index 38a353c8507..a8b1b386240 100644 --- a/sql/partition_info.h +++ b/sql/partition_info.h @@ -22,6 +22,7 @@ #include "sql_class.h" #include "partition_element.h" +#include "sql_partition.h" class partition_info; struct TABLE_LIST; @@ -34,6 +35,45 @@ typedef int (*get_subpart_id_func)(partition_info *part_info, struct st_ddl_log_memory_entry; +struct Vers_part_info : public Sql_alloc +{ + Vers_part_info() : + interval(0), + limit(0), + now_part(NULL), + hist_part(NULL), + stat_serial(0) + { + } + Vers_part_info(Vers_part_info &src) : + interval(src.interval), + limit(src.limit), + now_part(NULL), + hist_part(NULL), + stat_serial(src.stat_serial) + { + } + bool initialized(bool fully= true) + { + if (now_part) + { + DBUG_ASSERT(now_part->id != UINT32_MAX); + DBUG_ASSERT(now_part->type() == partition_element::AS_OF_NOW); + DBUG_ASSERT(!fully || (bool) hist_part); + DBUG_ASSERT(!hist_part || ( + hist_part->id != UINT32_MAX && + hist_part->type() == partition_element::VERSIONING)); + return true; + } + return false; + } + my_time_t interval; + ulonglong limit; + partition_element *now_part; + partition_element *hist_part; + ulonglong stat_serial; +}; + class partition_info : public Sql_alloc { public: @@ -143,6 +183,8 @@ public: part_column_list_val *range_col_array; part_column_list_val *list_col_array; }; + + Vers_part_info *vers_info; /******************************************** * INTERVAL ANALYSIS @@ -302,7 +344,7 @@ public: const char *find_duplicate_field(); char *find_duplicate_name(); bool check_engine_mix(handlerton *engine_type, bool default_engine); - bool check_range_constants(THD *thd); + bool check_range_constants(THD *thd, bool alloc= true); bool check_list_constants(THD *thd); bool check_partition_info(THD *thd, handlerton **eng_type, handler *file, HA_CREATE_INFO *info, @@ -332,6 +374,28 @@ public: size_t file_name_size, uint32 *part_id); void report_part_expr_error(bool use_subpart_expr); bool has_same_partitioning(partition_info *new_part_info); + inline bool is_partition_used(uint part_id) const + { + return bitmap_is_set(&read_partitions, part_id); + } + inline bool is_partition_locked(uint part_id) const + { + return bitmap_is_set(&lock_partitions, part_id); + } + inline uint num_partitions_used() + { + return bitmap_bits_set(&read_partitions); + } + inline uint get_first_used_partition() const + { + return bitmap_get_first_set(&read_partitions); + } + inline uint get_next_used_partition(uint part_id) const + { + return bitmap_get_next_set(&read_partitions, part_id); + } + bool same_key_column_order(List<Create_field> *create_list); + private: static int list_part_cmp(const void* a, const void* b); bool set_up_default_partitions(THD *thd, handler *file, HA_CREATE_INFO *info, @@ -342,10 +406,167 @@ private: uint start_no); char *create_default_subpartition_name(THD *thd, uint subpart_no, const char *part_name); + // FIXME: prune_partition_bitmaps() is duplicate of set_read_partitions() bool prune_partition_bitmaps(TABLE_LIST *table_list); bool add_named_partition(const char *part_name, uint length); public: + bool set_read_partitions(List<char> *partition_names); bool has_unique_name(partition_element *element); + + bool vers_init_info(THD *thd); + bool vers_set_interval(const INTERVAL &i); + bool vers_set_limit(ulonglong limit); + partition_element* vers_part_rotate(THD *thd); + bool vers_set_expression(THD *thd, partition_element *el, MYSQL_TIME &t); + bool vers_setup_expression(THD *thd, uint32 alter_add= 0); /* Stage 1. */ + bool vers_setup_stats(THD *thd, bool is_create_table_ind); /* Stage 2. */ + bool vers_scan_min_max(THD *thd, partition_element *part); + void vers_update_col_vals(THD *thd, partition_element *el0, partition_element *el1); + + partition_element *vers_hist_part() + { + DBUG_ASSERT(table && table->s); + DBUG_ASSERT(vers_info && vers_info->initialized()); + DBUG_ASSERT(table->s->hist_part_id != UINT32_MAX); + if (table->s->hist_part_id == vers_info->hist_part->id) + return vers_info->hist_part; + + List_iterator<partition_element> it(partitions); + partition_element *el; + while ((el= it++)) + { + DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL); + if (el->type() == partition_element::VERSIONING && + el->id == table->s->hist_part_id) + { + vers_info->hist_part= el; + return vers_info->hist_part; + } + } + DBUG_ASSERT(0); + return NULL; + } + partition_element *get_partition(uint part_id) + { + List_iterator<partition_element> it(partitions); + partition_element *el; + while ((el= it++)) + { + if (el->id == part_id) + return el; + } + return NULL; + } + bool vers_limit_exceed(partition_element *part= NULL) + { + DBUG_ASSERT(vers_info); + if (!vers_info->limit) + return false; + if (!part) + { + DBUG_ASSERT(vers_info->initialized()); + part= vers_hist_part(); + } + // TODO: cache thread-shared part_recs and increment on INSERT + return table->file->part_records(part) >= vers_info->limit; + } + Vers_min_max_stats& vers_stat_trx(stat_trx_field fld, uint32 part_element_id) + { + DBUG_ASSERT(table && table->s && table->s->stat_trx); + Vers_min_max_stats* res= table->s->stat_trx[part_element_id * num_columns + fld]; + DBUG_ASSERT(res); + return *res; + } + Vers_min_max_stats& vers_stat_trx(stat_trx_field fld, partition_element *part) + { + DBUG_ASSERT(part); + return vers_stat_trx(fld, part->id); + } + bool vers_interval_exceed(my_time_t max_time, partition_element *part= NULL) + { + DBUG_ASSERT(vers_info); + if (!vers_info->interval) + return false; + if (!part) + { + DBUG_ASSERT(vers_info->initialized()); + part= vers_hist_part(); + } + my_time_t min_time= vers_stat_trx(STAT_TRX_END, part).min_time(); + return max_time - min_time > vers_info->interval; + } + bool vers_interval_exceed(partition_element *part) + { + return vers_interval_exceed(vers_stat_trx(STAT_TRX_END, part).max_time(), part); + } + bool vers_interval_exceed() + { + return vers_interval_exceed(vers_hist_part()); + } + bool vers_trx_id_to_ts(THD *thd, Field *in_trx_id, Field_timestamp &out_ts); + void vers_update_stats(THD *thd, partition_element *el) + { + DBUG_ASSERT(vers_info && vers_info->initialized()); + DBUG_ASSERT(table && table->s); + DBUG_ASSERT(el && el->type() == partition_element::VERSIONING); + bool updated; + mysql_rwlock_wrlock(&table->s->LOCK_stat_serial); + el->empty= false; + if (table->versioned_by_engine()) + { + // transaction is not yet pushed to VTQ, so we use now-time + my_time_t end_ts= my_time(0); + + uchar buf[8]; + Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); + fld.store_TIME(end_ts, 0); + updated= + vers_stat_trx(STAT_TRX_END, el->id).update(&fld); + } + else + { + updated= + vers_stat_trx(STAT_TRX_END, el->id).update(table->vers_end_field()); + } + if (updated) + table->s->stat_serial++; + mysql_rwlock_unlock(&table->s->LOCK_stat_serial); + if (updated) + { + vers_update_col_vals(thd, + el->id > 0 ? get_partition(el->id - 1) : NULL, + el); + } + } + void vers_update_stats(THD *thd, uint part_id) + { + DBUG_ASSERT(vers_info && vers_info->initialized()); + if (part_id < vers_info->now_part->id) + vers_update_stats(thd, get_partition(part_id)); + } + bool vers_update_range_constants(THD *thd) + { + DBUG_ASSERT(vers_info && vers_info->initialized()); + DBUG_ASSERT(table && table->s); + + mysql_rwlock_rdlock(&table->s->LOCK_stat_serial); + if (vers_info->stat_serial == table->s->stat_serial) + { + mysql_rwlock_unlock(&table->s->LOCK_stat_serial); + return false; + } + + bool result= false; + for (uint i= 0; i < num_columns; ++i) + { + Field *f= part_field_array[i]; + bitmap_set_bit(f->table->write_set, f->field_index); + } + result= check_range_constants(thd, false); + vers_info->stat_serial= table->s->stat_serial; + mysql_rwlock_unlock(&table->s->LOCK_stat_serial); + return result; + } }; uint32 get_next_partition_id_range(struct st_partition_iter* part_iter); diff --git a/sql/set_var.h b/sql/set_var.h index d0143e1e524..48990755ca0 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -224,6 +224,13 @@ private: virtual bool session_update(THD *thd, set_var *var) = 0; virtual bool global_update(THD *thd, set_var *var) = 0; +public: + virtual bool option_updated() + { + DBUG_ASSERT(false); + return true; + } + protected: /** A pointer to a value of the variable for SHOW. diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index e414a674af6..a5b8a639b2f 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7786,3 +7786,110 @@ ER_COMPRESSED_COLUMN_USED_AS_KEY eng "Compressed column '%-.192s' can't be used in key specification" ER_UNKNOWN_COMPRESSION_METHOD eng "Unknown compression method: %s" + +# MariaDB error numbers related to System Versioning + +ER_VERSIONING_REQUIRED + eng "System Versioning required: %s" + +ER_VERS_READONLY_FIELD + eng "System field %`s is read-only" + +ER_UPDATE_INFO_WITH_SYSTEM_VERSIONING + eng "Rows matched: %ld Changed: %ld Inserted: %ld Warnings: %ld" + +ER_VERS_FIELD_WRONG_TYPE + eng "%`s must be of type %`s for versioned table %`s" + +ER_VERS_WRONG_PARAMS + eng "Wrong parameters %s" + +ER_VERS_ENGINE_UNSUPPORTED + eng "Engine does not support System Versioning for %`s" + +ER_VERS_RANGE_UNITS_MISMATCH + eng "Range units mismatch" + +ER_NON_VERSIONED_FIELD_IN_VERSIONED_QUERY + eng "Attempt to read unversioned field %`s in historical query" + +ER_PARTITION_WRONG_TYPE + eng "Wrong partition type, expected type: %`s" + +WARN_VERS_PART_FULL + eng "Using full partition %`s, need more VERSIONING partitions!" + +WARN_VERS_PARAMETERS + eng "Maybe missing parameters: %s" + +WARN_VERS_PART_ROTATION + eng "Switching from partition %`s to %`s" + +WARN_VERS_TRX_MISSING + eng "VTQ missing transaction ID %lu" + +WARN_VERS_PART_NON_HISTORICAL + eng "Partition %`s contains non-historical data" + +ER_VERS_NOT_ALLOWED + eng "%`s is not allowed for versioned table" + +ER_VERS_RANGE_PROHIBITED + eng "SYSTEM_TIME range selector is prohibited" + +ER_VERS_VIEW_PROHIBITED + eng "Creating VIEW %`s is prohibited!" + +ER_VERS_DERIVED_PROHIBITED + eng "Derived table is prohibited!" + +ER_VERS_UNUSED_CLAUSE + eng "Unused clause: '%s'" + +WARN_VERS_ALIAS_TOO_LONG + eng "Auto generated alias for `%s.%s` is too long; using `%s`" + +ER_VERS_VTMD_ERROR + eng "VTMD error: %s" + +ER_NOT_ALLOWED + eng "for %`s: not allowed '%s'" + +ER_VERS_DIFFERENT_TABLES + eng "for %`s: system fields selected from different tables" + +ER_VERS_NO_COLS_DEFINED + eng "for %`s: no columns defined '%s'" + +ER_VERS_NOT_VERSIONED + eng "for %`s: table is not versioned" + +ER_MISSING + eng "for %`s: missing '%s'" + +ER_MISMATCH + eng "for %`s: mismatch '%s' and '%s'" + +ER_PART_WRONG_VALUE + eng "for partitioned %`s: wrong value for '%s'" + +ER_VERS_WRONG_PARTS + eng "Wrong partitions consistency for %`s: must have at least one 'VERSIONING' and exactly one last 'AS OF NOW'" + +ER_VERS_HISTORY_LOCK + eng "Versioned SELECT write-locking of history rows" + +ER_VERS_NO_TRX_ID + eng "TRX_ID %lu not found in VTQ" + +ER_WRONG_TABLESPACE_NAME 42000 + eng "Incorrect tablespace name `%-.192s`" + +ER_VERS_ALTER_SYSTEM_FIELD + eng "Can not change system versioning field '%s'" + +ER_VERS_SYS_FIELD_NOT_HIDDEN + eng "System versioning field '%s' is not hidden" + +ER_NOT_LOG_TABLE + eng "Table `%s.%s` is not a log table" diff --git a/sql/sp_cache.cc b/sql/sp_cache.cc index 342673bf619..cf890f316bc 100644 --- a/sql/sp_cache.cc +++ b/sql/sp_cache.cc @@ -238,6 +238,10 @@ void sp_cache_flush_obsolete(sp_cache **cp, sp_head **sp) } } +void sp_cache_flush(sp_cache *cp, sp_head *sp) +{ + cp->remove(sp); +} /** Return the current global version of the cache. diff --git a/sql/sp_cache.h b/sql/sp_cache.h index a045ff5d3c5..59e0fc186dd 100644 --- a/sql/sp_cache.h +++ b/sql/sp_cache.h @@ -60,6 +60,7 @@ void sp_cache_insert(sp_cache **cp, sp_head *sp); sp_head *sp_cache_lookup(sp_cache **cp, const Database_qualified_name *name); void sp_cache_invalidate(); void sp_cache_flush_obsolete(sp_cache **cp, sp_head **sp); +void sp_cache_flush(sp_cache *cp, sp_head *sp); ulong sp_cache_version(); void sp_cache_enforce_limit(sp_cache *cp, ulong upper_limit_for_elements); diff --git a/sql/sp_head.cc b/sql/sp_head.cc index df9471880d9..1fb780528c8 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -2530,6 +2530,7 @@ sp_head::restore_thd_mem_root(THD *thd) Item *flist= free_list; // The old list set_query_arena(thd); // Get new free_list and mem_root state= STMT_INITIALIZED_FOR_SP; + is_stored_procedure= true; DBUG_PRINT("info", ("mem_root %p returned from thd mem root %p", &mem_root, &thd->mem_root)); diff --git a/sql/sp_head.h b/sql/sp_head.h index 734c0dea3e3..815a68c922f 100644 --- a/sql/sp_head.h +++ b/sql/sp_head.h @@ -209,7 +209,7 @@ public: ulong sp_cache_version() const { return m_sp_cache_version; } /** Set the value of the SP cache version. */ - void set_sp_cache_version(ulong version_arg) + void set_sp_cache_version(ulong version_arg) const { m_sp_cache_version= version_arg; } @@ -231,7 +231,7 @@ private: is obsolete and should not be used -- sp_cache_flush_obsolete() will purge it. */ - ulong m_sp_cache_version; + mutable ulong m_sp_cache_version; Stored_program_creation_ctx *m_creation_ctx; /** Boolean combination of (1<<flag), where flag is a member of diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 7e31d448bdf..4b9c0e325a3 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -170,6 +170,11 @@ TABLE_FIELD_TYPE mysql_db_table_fields[MYSQL_DB_FIELD_COUNT] = { { C_STRING_WITH_LEN("Trigger_priv") }, { C_STRING_WITH_LEN("enum('N','Y')") }, { C_STRING_WITH_LEN("utf8") } + }, + { + { C_STRING_WITH_LEN("Delete_versioning_rows_priv") }, + { C_STRING_WITH_LEN("enum('N','Y')") }, + { C_STRING_WITH_LEN("utf8") } } }; @@ -695,9 +700,9 @@ bool ROLE_GRANT_PAIR::init(MEM_ROOT *mem, const char *username, #endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */ #define NORMAL_HANDSHAKE_SIZE 6 -#define ROLE_ASSIGN_COLUMN_IDX 43 -#define DEFAULT_ROLE_COLUMN_IDX 44 -#define MAX_STATEMENT_TIME_COLUMN_IDX 45 +#define ROLE_ASSIGN_COLUMN_IDX 44 +#define DEFAULT_ROLE_COLUMN_IDX 45 +#define MAX_STATEMENT_TIME_COLUMN_IDX 46 /* various flags valid for ACL_USER */ #define IS_ROLE (1L << 0) @@ -2013,6 +2018,9 @@ static bool acl_load(THD *thd, const Grant_tables& tables) if (user_table.num_fields() <= 38 && (user.access & SUPER_ACL)) user.access|= TRIGGER_ACL; + if (user_table.num_fields() <= 46 && (user.access & DELETE_ACL)) + user.access|= DELETE_VERSIONING_ROWS_ACL; + user.sort= get_sort(2, user.host.hostname, user.user.str); user.hostname_length= safe_strlen(user.host.hostname); user.user_resource.user_conn= 0; @@ -8465,13 +8473,14 @@ static const char *command_array[]= "ALTER", "SHOW DATABASES", "SUPER", "CREATE TEMPORARY TABLES", "LOCK TABLES", "EXECUTE", "REPLICATION SLAVE", "REPLICATION CLIENT", "CREATE VIEW", "SHOW VIEW", "CREATE ROUTINE", "ALTER ROUTINE", - "CREATE USER", "EVENT", "TRIGGER", "CREATE TABLESPACE" + "CREATE USER", "EVENT", "TRIGGER", "CREATE TABLESPACE", + "DELETE VERSIONING ROWS" }; static uint command_lengths[]= { 6, 6, 6, 6, 6, 4, 6, 8, 7, 4, 5, 10, 5, 5, 14, 5, 23, 11, 7, 17, 18, 11, 9, - 14, 13, 11, 5, 7, 17 + 14, 13, 11, 5, 7, 17, 22, }; diff --git a/sql/sql_acl.h b/sql/sql_acl.h index e3dba20422d..49a7844108e 100644 --- a/sql/sql_acl.h +++ b/sql/sql_acl.h @@ -49,6 +49,7 @@ #define EVENT_ACL (1UL << 26) #define TRIGGER_ACL (1UL << 27) #define CREATE_TABLESPACE_ACL (1UL << 28) +#define DELETE_VERSIONING_ROWS_ACL (1UL << 29) /* don't forget to update 1. static struct show_privileges_st sys_privileges[] @@ -62,12 +63,13 @@ (UPDATE_ACL | SELECT_ACL | INSERT_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_TMP_ACL | \ LOCK_TABLES_ACL | EXECUTE_ACL | CREATE_VIEW_ACL | SHOW_VIEW_ACL | \ - CREATE_PROC_ACL | ALTER_PROC_ACL | EVENT_ACL | TRIGGER_ACL) + CREATE_PROC_ACL | ALTER_PROC_ACL | EVENT_ACL | TRIGGER_ACL | \ + DELETE_VERSIONING_ROWS_ACL) #define TABLE_ACLS \ (SELECT_ACL | INSERT_ACL | UPDATE_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_VIEW_ACL | \ - SHOW_VIEW_ACL | TRIGGER_ACL) + SHOW_VIEW_ACL | TRIGGER_ACL | DELETE_VERSIONING_ROWS_ACL) #define COL_ACLS \ (SELECT_ACL | INSERT_ACL | UPDATE_ACL | REFERENCES_ACL) @@ -85,7 +87,7 @@ CREATE_TMP_ACL | LOCK_TABLES_ACL | REPL_SLAVE_ACL | REPL_CLIENT_ACL | \ EXECUTE_ACL | CREATE_VIEW_ACL | SHOW_VIEW_ACL | CREATE_PROC_ACL | \ ALTER_PROC_ACL | CREATE_USER_ACL | EVENT_ACL | TRIGGER_ACL | \ - CREATE_TABLESPACE_ACL) + CREATE_TABLESPACE_ACL | DELETE_VERSIONING_ROWS_ACL) #define DEFAULT_CREATE_PROC_ACLS \ (ALTER_PROC_ACL | EXECUTE_ACL) @@ -117,31 +119,37 @@ CREATE_PROC_ACL | ALTER_PROC_ACL ) #define DB_CHUNK4 (EXECUTE_ACL) #define DB_CHUNK5 (EVENT_ACL | TRIGGER_ACL) +#define DB_CHUNK6 (DELETE_VERSIONING_ROWS_ACL) #define fix_rights_for_db(A) (((A) & DB_CHUNK0) | \ (((A) << 4) & DB_CHUNK1) | \ (((A) << 6) & DB_CHUNK2) | \ (((A) << 9) & DB_CHUNK3) | \ - (((A) << 2) & DB_CHUNK4))| \ - (((A) << 9) & DB_CHUNK5) + (((A) << 2) & DB_CHUNK4) | \ + (((A) << 9) & DB_CHUNK5) | \ + (((A) << 10) & DB_CHUNK6)) #define get_rights_for_db(A) (((A) & DB_CHUNK0) | \ (((A) & DB_CHUNK1) >> 4) | \ (((A) & DB_CHUNK2) >> 6) | \ (((A) & DB_CHUNK3) >> 9) | \ - (((A) & DB_CHUNK4) >> 2))| \ - (((A) & DB_CHUNK5) >> 9) + (((A) & DB_CHUNK4) >> 2) | \ + (((A) & DB_CHUNK5) >> 9) | \ + (((A) & DB_CHUNK6) >> 10)) #define TBL_CHUNK0 DB_CHUNK0 #define TBL_CHUNK1 DB_CHUNK1 #define TBL_CHUNK2 (CREATE_VIEW_ACL | SHOW_VIEW_ACL) #define TBL_CHUNK3 TRIGGER_ACL +#define TBL_CHUNK4 (DELETE_VERSIONING_ROWS_ACL) #define fix_rights_for_table(A) (((A) & TBL_CHUNK0) | \ (((A) << 4) & TBL_CHUNK1) | \ (((A) << 11) & TBL_CHUNK2) | \ - (((A) << 15) & TBL_CHUNK3)) + (((A) << 15) & TBL_CHUNK3) | \ + (((A) << 16) & TBL_CHUNK4)) #define get_rights_for_table(A) (((A) & TBL_CHUNK0) | \ (((A) & TBL_CHUNK1) >> 4) | \ (((A) & TBL_CHUNK2) >> 11) | \ - (((A) & TBL_CHUNK3) >> 15)) + (((A) & TBL_CHUNK3) >> 15) | \ + (((A) & TBL_CHUNK4) >> 16)) #define fix_rights_for_column(A) (((A) & 7) | (((A) & ~7) << 8)) #define get_rights_for_column(A) (((A) & 7) | ((A) >> 8)) #define fix_rights_for_procedure(A) ((((A) << 18) & EXECUTE_ACL) | \ @@ -175,6 +183,7 @@ enum mysql_db_table_field MYSQL_DB_FIELD_EXECUTE_PRIV, MYSQL_DB_FIELD_EVENT_PRIV, MYSQL_DB_FIELD_TRIGGER_PRIV, + MYSQL_DB_FIELD_DELETE_VERSIONING_ROWS_PRIV, MYSQL_DB_FIELD_COUNT }; diff --git a/sql/sql_alter.h b/sql/sql_alter.h index a37d96934ea..39458c45b80 100644 --- a/sql/sql_alter.h +++ b/sql/sql_alter.h @@ -100,6 +100,22 @@ public: enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE }; + bool vers_data_modifying() const + { + return flags & ( + ALTER_ADD_COLUMN | + ALTER_DROP_COLUMN | + ALTER_CHANGE_COLUMN | + ALTER_DROP_PARTITION | + ALTER_COALESCE_PARTITION | + ALTER_REORGANIZE_PARTITION | + ALTER_TABLE_REORG | + ALTER_REMOVE_PARTITIONING | + ALTER_EXCHANGE_PARTITION | + ALTER_TRUNCATE_PARTITION | + ALTER_COLUMN_ORDER); + } + /** The different values of the ALGORITHM clause. Describes which algorithm to use when altering the table. diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 7fd6599df51..99971a53a3c 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -5712,6 +5712,7 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, if (field_to_set) { TABLE *table= field_to_set->table; + DBUG_ASSERT(table); if (thd->mark_used_columns == MARK_COLUMNS_READ) bitmap_set_bit(table->read_set, field_to_set->field_index); else @@ -6389,6 +6390,19 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2, bool is_using_column_1; if (!(nj_col_1= it_1.get_or_create_column_ref(thd, leaf_1))) goto err; + + if (nj_col_1->field() && nj_col_1->field()->vers_sys_field()) + continue; + + if (table_ref_1->is_view() && table_ref_1->table->versioned()) + { + Item *item= nj_col_1->view_field->item; + DBUG_ASSERT(item->type() == Item::FIELD_ITEM); + Item_field *item_field= (Item_field *)item; + if (item_field->field->vers_sys_field()) + continue; + } + field_name_1= nj_col_1->name(); is_using_column_1= using_fields && test_if_string_in_list(field_name_1->str, using_fields); @@ -7083,7 +7097,7 @@ bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array, thd->lex->current_select->is_item_list_lookup= 0; /* - To prevent fail on forward lookup we fill it with zerows, + To prevent fail on forward lookup we fill it with zeroes, then if we got pointer on zero after find_item_in_list we will know that it is forward lookup. @@ -7483,6 +7497,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, Field_iterator_table_ref field_iterator; bool found; char name_buff[SAFE_NAME_LEN+1]; + ulong vers_hide= thd->variables.vers_hide; DBUG_ENTER("insert_fields"); DBUG_PRINT("arena", ("stmt arena: %p",thd->stmt_arena)); @@ -7586,6 +7601,52 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, if (!(item= field_iterator.create_item(thd))) DBUG_RETURN(TRUE); + if (item->type() == Item::FIELD_ITEM) + { + Item_field *f= static_cast<Item_field *>(item); + DBUG_ASSERT(f->field); + uint32 fl= f->field->flags; + bool sys_field= fl & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG); + SELECT_LEX *slex= thd->lex->current_select; + TABLE *table= f->field->table; + DBUG_ASSERT(table && table->pos_in_table_list); + TABLE_LIST *tl= table->pos_in_table_list; + vers_range_type_t vers_type= tl->vers_conditions.type; + + enum_sql_command sql_command= thd->lex->sql_command; + unsigned int create_options= thd->lex->create_info.options; + + if ( + sql_command == SQLCOM_CREATE_TABLE ? + sys_field && !(create_options & HA_VERSIONED_TABLE) : ( + sys_field ? + (sql_command == SQLCOM_CREATE_VIEW || + slex->nest_level > 0 || + vers_hide == VERS_HIDE_FULL || + ((fl & HIDDEN_FLAG) && ( + vers_hide == VERS_HIDE_IMPLICIT || + (vers_hide == VERS_HIDE_AUTO && ( + vers_type == FOR_SYSTEM_TIME_UNSPECIFIED || + vers_type == FOR_SYSTEM_TIME_AS_OF))))) : + (fl & HIDDEN_FLAG))) + { + continue; + } + } + else if (item->type() == Item::REF_ITEM) + { + Item *i= item; + while (i->type() == Item::REF_ITEM) + i= *((Item_ref *)i)->ref; + if (i->type() == Item::FIELD_ITEM) + { + Item_field *f= (Item_field *)i; + DBUG_ASSERT(f->field); + if (f->field->flags & HIDDEN_FLAG) + continue; + } + } + /* cache the table for the Item_fields inserted by expanding stars */ if (item->type() == Item::FIELD_ITEM && tables->cacheable_table) ((Item_field *)item)->cached_table= tables; @@ -7980,6 +8041,13 @@ fill_record(THD *thd, TABLE *table_arg, List<Item> &fields, List<Item> &values, ER_THD(thd, ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN), rfield->field_name.str, table->s->table_name.str); } + if (table->versioned() && rfield->vers_sys_field() && + !ignore_errors) + { + my_error(ER_VERS_READONLY_FIELD, MYF(0), rfield->field_name.str); + goto err; + } + if (rfield->stored_in_db() && (value->save_in_field(rfield, 0)) < 0 && !ignore_errors) { @@ -8020,7 +8088,7 @@ void switch_to_nullable_trigger_fields(List<Item> &items, TABLE *table) Field** field= table->field_to_fill(); /* True if we have NOT NULL fields and BEFORE triggers */ - if (field != table->field) + if (field != table->field && field != table->non_generated_field) { List_iterator_fast<Item> it(items); Item *item; @@ -8227,6 +8295,13 @@ fill_record(THD *thd, TABLE *table, Field **ptr, List<Item> &values, } } + if (table->versioned() && field->vers_sys_field() && + !ignore_errors) + { + my_error(ER_VERS_READONLY_FIELD, MYF(0), field->field_name.str); + goto err; + } + if (use_value) value->save_val(field); else @@ -8478,7 +8553,6 @@ int init_ftfuncs(THD *thd, SELECT_LEX *select_lex, bool no_order) { List_iterator<Item_func_match> li(*(select_lex->ftfunc_list)); Item_func_match *ifm; - DBUG_PRINT("info",("Performing FULLTEXT search")); while ((ifm=li++)) ifm->init_search(thd, no_order); @@ -8667,10 +8741,31 @@ open_log_table(THD *thd, TABLE_LIST *one_table, Open_tables_backup *backup) if ((table= open_ltable(thd, one_table, one_table->lock_type, flags))) { - DBUG_ASSERT(table->s->table_category == TABLE_CATEGORY_LOG); - /* Make sure all columns get assigned to a default value */ - table->use_all_columns(); - DBUG_ASSERT(table->s->no_replicate); + if (table->s->table_category == TABLE_CATEGORY_LOG) + { + /* Make sure all columns get assigned to a default value */ + table->use_all_columns(); + DBUG_ASSERT(table->s->no_replicate); + } + else + { + my_error(ER_NOT_LOG_TABLE, MYF(0), table->s->db.str, table->s->table_name.str); + int error= 0; + if (table->current_lock != F_UNLCK) + { + table->current_lock= F_UNLCK; + error= table->file->ha_external_lock(thd, F_UNLCK); + } + if (error) + table->file->print_error(error, MYF(0)); + else + { + tc_release_table(table); + thd->reset_open_tables_state(thd); + thd->restore_backup_open_tables_state(backup); + table= NULL; + } + } } else thd->restore_backup_open_tables_state(backup); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 889acc57b76..65d9afcf509 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -710,6 +710,11 @@ extern "C" void thd_kill_timeout(THD* thd) mysql_mutex_unlock(&thd->LOCK_thd_data); } +Time_zone * thd_get_timezone(THD * thd) +{ + DBUG_ASSERT(thd && thd->variables.time_zone); + return thd->variables.time_zone; +} THD::THD(my_thread_id id, bool is_wsrep_applier) :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, @@ -3700,6 +3705,7 @@ void Query_arena::set_query_arena(Query_arena *set) mem_root= set->mem_root; free_list= set->free_list; state= set->state; + is_stored_procedure= set->is_stored_procedure; } @@ -4772,12 +4778,18 @@ extern "C" int thd_rpl_is_parallel(const MYSQL_THD thd) return thd->rgi_slave && thd->rgi_slave->is_parallel_exec; } +/* Returns high resolution timestamp for the start + of the current query. */ +extern "C" time_t thd_start_time(const MYSQL_THD thd) +{ + return thd->start_time; +} /* Returns high resolution timestamp for the start of the current query. */ extern "C" unsigned long long thd_start_utime(const MYSQL_THD thd) { - return thd->start_utime; + return thd->start_time * 1000000 + thd->start_time_sec_part; } @@ -7051,6 +7063,15 @@ static bool protect_against_unsafe_warning_flood(int unsafe_type) DBUG_RETURN(unsafe_warning_suppression_active[unsafe_type]); } +MYSQL_TIME THD::query_start_TIME() +{ + MYSQL_TIME res; + variables.time_zone->gmt_sec_to_TIME(&res, query_start()); + res.second_part= query_start_sec_part(); + time_zone_used= 1; + return res; +} + /** Auxiliary method used by @c binlog_query() to raise warnings. @@ -7670,3 +7691,16 @@ void Database_qualified_name::copy(MEM_ROOT *mem_root, #endif /* !defined(MYSQL_CLIENT) */ + + +Query_arena_stmt::Query_arena_stmt(THD *_thd) : + thd(_thd) +{ + arena= thd->activate_stmt_arena_if_needed(&backup); +} + +Query_arena_stmt::~Query_arena_stmt() +{ + if (arena) + thd->restore_active_arena(arena, &backup); +} diff --git a/sql/sql_class.h b/sql/sql_class.h index fda56c8cc3a..1ef3ca21471 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -702,6 +702,12 @@ typedef struct system_variables uint idle_write_transaction_timeout; uint column_compression_threshold; uint column_compression_zlib_level; + + st_vers_current_time vers_current_time; + my_bool vers_force; + ulong vers_hide; + my_bool vers_innodb_algorithm_simple; + ulong vers_alter_history; } SV; /** @@ -938,6 +944,11 @@ public: enum_state state; +protected: + friend class sp_head; + bool is_stored_procedure; + +public: /* We build without RTTI, so dynamic_cast can't be used. */ enum Type { @@ -945,7 +956,8 @@ public: }; Query_arena(MEM_ROOT *mem_root_arg, enum enum_state state_arg) : - free_list(0), mem_root(mem_root_arg), state(state_arg) + free_list(0), mem_root(mem_root_arg), state(state_arg), + is_stored_procedure(state_arg == STMT_INITIALIZED_FOR_SP ? true : false) { INIT_ARENA_DBUG_INFO; } /* This constructor is used only when Query_arena is created as @@ -965,6 +977,8 @@ public: { return state == STMT_PREPARED || state == STMT_EXECUTED; } inline bool is_conventional() const { return state == STMT_CONVENTIONAL_EXECUTION; } + inline bool is_sp_execute() const + { return is_stored_procedure; } inline void* alloc(size_t size) { return alloc_root(mem_root,size); } inline void* calloc(size_t size) @@ -1009,6 +1023,22 @@ public: }; +class Query_arena_stmt +{ + THD *thd; + Query_arena backup; + Query_arena *arena; + +public: + Query_arena_stmt(THD *_thd); + ~Query_arena_stmt(); + bool arena_replaced() + { + return arena != NULL; + } +}; + + class Server_side_cursor; /** @@ -3287,6 +3317,7 @@ public: inline my_time_t query_start() { query_start_used=1; return start_time; } inline ulong query_start_sec_part() { query_start_sec_part_used=1; return start_time_sec_part; } + MYSQL_TIME query_start_TIME(); inline void set_current_time() { my_hrtime_t hrtime= my_hrtime(); @@ -4083,7 +4114,12 @@ public: Lex_input_stream *lip= &m_parser_state->m_lip; if (!yytext) { - if (!(yytext= lip->get_tok_start())) + if (lip->lookahead_token >= 0) + yytext= lip->get_tok_start_prev(); + else + yytext= lip->get_tok_start(); + + if (!yytext) yytext= ""; } /* Push an error into the error stack */ @@ -5712,6 +5748,12 @@ class multi_update :public select_result_interceptor /* Need this to protect against multiple prepare() calls */ bool prepared; + + // For System Versioning (may need to insert new fields to a table). + ha_rows updated_sys_ver; + + bool has_vers_fields; + public: multi_update(THD *thd_arg, TABLE_LIST *ut, List<TABLE_LIST> *leaves_list, List<Item> *fields, List<Item> *values, @@ -6229,6 +6271,24 @@ inline bool lex_string_eq(const LEX_CSTRING *a, return strcasecmp(a->str, b->str) != 0; } -#endif /* MYSQL_SERVER */ +class ScopedStatementReplication +{ +public: + ScopedStatementReplication(THD *thd) : thd(thd) + { + if (thd) + saved_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + } + ~ScopedStatementReplication() + { + if (thd) + thd->restore_stmt_binlog_format(saved_binlog_format); + } +private: + enum_binlog_format saved_binlog_format; + THD *thd; +}; + +#endif /* MYSQL_SERVER */ #endif /* SQL_CLASS_INCLUDED */ diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index c880374f9ab..39502a6637c 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -217,6 +217,13 @@ void Update_plan::save_explain_data_intern(MEM_ROOT *mem_root, static bool record_should_be_deleted(THD *thd, TABLE *table, SQL_SELECT *sel, Explain_delete *explain) { + if (table->versioned()) + { + bool row_is_alive= table->vers_end_field()->is_max(); + if (table->pos_in_table_list->vers_conditions ? row_is_alive : !row_is_alive) + return false; + } + explain->tracker.on_record_read(); thd->inc_examined_row_count(1); if (table->vfield) @@ -230,6 +237,18 @@ static bool record_should_be_deleted(THD *thd, TABLE *table, SQL_SELECT *sel, } +inline +int TABLE::delete_row() +{ + if (!versioned_by_sql() || !vers_end_field()->is_max()) + return file->ha_delete_row(record[0]); + + store_record(this, record[1]); + vers_end_field()->set_time(); + return file->ha_update_row(record[1], record[0]); +} + + /** Implement DELETE SQL word. @@ -273,6 +292,23 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, if (open_and_lock_tables(thd, table_list, TRUE, 0)) DBUG_RETURN(TRUE); + bool truncate_history= table_list->vers_conditions; + if (truncate_history) + { + TABLE *table= table_list->table; + DBUG_ASSERT(table); + + DBUG_ASSERT(!conds); + if (vers_setup_select(thd, table_list, &conds, select_lex)) + DBUG_RETURN(TRUE); + + // trx_sees() in InnoDB reads sys_trx_start + if (!table->versioned_by_sql()) { + DBUG_ASSERT(table_list->vers_conditions.type == FOR_SYSTEM_TIME_BEFORE); + bitmap_set_bit(table->read_set, table->vers_end_field()->field_index); + } + } + if (mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT)) DBUG_RETURN(TRUE); if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE)) @@ -365,7 +401,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, */ if (!with_select && !using_limit && const_cond_result && (!thd->is_current_stmt_binlog_format_row() && - !(table->triggers && table->triggers->has_delete_triggers()))) + !(table->triggers && table->triggers->has_delete_triggers())) + && !table->versioned_by_sql()) { /* Update the table->file->stats.records number */ table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); @@ -620,7 +657,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, delete_record= record_should_be_deleted(thd, table, select, explain); if (delete_record) { - if (table->triggers && + if (!truncate_history && table->triggers && table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_BEFORE, FALSE)) { @@ -634,10 +671,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, break; } - if (!(error= table->file->ha_delete_row(table->record[0]))) + error= table->delete_row(); + if (!error) { deleted++; - if (table->triggers && + if (!truncate_history && table->triggers && table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_AFTER, FALSE)) { @@ -723,6 +761,8 @@ cleanup: else errcode= query_error_code(thd, killed_status == NOT_KILLED); + ScopedStatementReplication scoped_stmt_rpl( + table->versioned_by_engine() ? thd : NULL); /* [binlog]: If 'handler::delete_all_rows()' was called and the storage engine does not inject the rows itself, we replicate @@ -1104,6 +1144,11 @@ int multi_delete::send_data(List<Item> &values) if (table->status & (STATUS_NULL_ROW | STATUS_DELETED)) continue; + if (table->versioned() && !table->vers_end_field()->is_max()) + { + continue; + } + table->file->position(table->record[0]); found++; @@ -1116,7 +1161,9 @@ int multi_delete::send_data(List<Item> &values) TRG_ACTION_BEFORE, FALSE)) DBUG_RETURN(1); table->status|= STATUS_DELETED; - if (!(error=table->file->ha_delete_row(table->record[0]))) + + error= table->delete_row(); + if (!error) { deleted++; if (!table->file->has_transactions()) @@ -1295,8 +1342,8 @@ int multi_delete::do_table_deletes(TABLE *table, SORT_INFO *sort_info, local_error= 1; break; } - - local_error= table->file->ha_delete_row(table->record[0]); + + local_error= table->delete_row(); if (local_error && !ignore) { table->file->print_error(local_error, MYF(0)); diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index 2df3af03af5..c8aa8702329 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -732,6 +732,131 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived) cursor= cursor->next_local) cursor->outer_join|= JOIN_TYPE_OUTER; } + + // System Versioning: fix system fields of versioned derived table +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat" +#pragma GCC diagnostic ignored "-Wformat-extra-args" + if ((thd->stmt_arena->is_stmt_prepare() || !thd->stmt_arena->is_stmt_execute()) + && sl->table_list.elements > 0) + { + // Similar logic as in mysql_create_view() + // Leading versioning table detected implicitly (first one selected) + TABLE_LIST *impli_table= NULL; + // Leading versioning table specified explicitly + // (i.e. if at least one system field is selected) + TABLE_LIST *expli_table= NULL; + const LString_i *impli_start, *impli_end; + Item_field *expli_start= NULL, *expli_end= NULL; + + for (TABLE_LIST *table= sl->table_list.first; table; table= table->next_local) + { + if (!table->table || !table->table->versioned()) + continue; + + const LString_i table_start= table->table->vers_start_field()->field_name; + const LString_i table_end= table->table->vers_end_field()->field_name; + if (!impli_table) + { + impli_table= table; + impli_start= &table_start; + impli_end= &table_end; + } + + /* Implicitly add versioning fields if needed */ + Item *item; + List_iterator_fast<Item> it(sl->item_list); + + DBUG_ASSERT(table->alias); + while ((item= it++)) + { + if (item->real_item()->type() != Item::FIELD_ITEM) + continue; + Item_field *fld= (Item_field*) (item->real_item()); + if (fld->table_name && 0 != my_strcasecmp(table_alias_charset, table->alias, fld->table_name)) + continue; + DBUG_ASSERT(fld->field_name.str); + if (table_start == fld->field_name) + { + if (expli_start) + { + my_printf_error( + ER_VERS_DERIVED_PROHIBITED, + "Derived table is prohibited: multiple start system fields `%s.%s`, `%s.%s` in query!", MYF(0), + expli_table->alias, + expli_start->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto exit; + } + if (expli_table) + { + if (expli_table != table) + { +expli_table_err: + my_printf_error( + ER_VERS_DERIVED_PROHIBITED, + "Derived table is prohibited: system fields from multiple tables %`s, %`s in query!", MYF(0), + expli_table->alias, + table->alias); + res= true; + goto exit; + } + } + else + expli_table= table; + expli_start= fld; + impli_end= &table_end; + } + else if (table_end == fld->field_name) + { + if (expli_end) + { + my_printf_error( + ER_VERS_DERIVED_PROHIBITED, + "Derived table is prohibited: multiple end system fields `%s.%s`, `%s.%s` in query!", MYF(0), + expli_table->alias, + expli_end->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto exit; + } + if (expli_table) + { + if (expli_table != table) + goto expli_table_err; + } + else + expli_table= table; + expli_end= fld; + impli_start= &table_start; + } + } // while ((item= it++)) + } // for (TABLE_LIST *table) + + if (expli_table) + impli_table= expli_table; + + if (impli_table) + { + Query_arena_stmt on_stmt_arena(thd); + if (!expli_start && (res= sl->vers_push_field(thd, impli_table, *impli_start))) + goto exit; + if (!expli_end && (res= sl->vers_push_field(thd, impli_table, *impli_end))) + goto exit; + + if (impli_table->vers_conditions) + { + sl->vers_export_outer= impli_table->vers_conditions; + } + else + sl->vers_import_outer= true; // FIXME: is needed? + } + } // if (sl->table_list.elements > 0) +#pragma GCC diagnostic pop + // System Versioning end } unit->derived= derived; diff --git a/sql/sql_error.h b/sql/sql_error.h index f8b8adc805a..b89af72cc44 100644 --- a/sql/sql_error.h +++ b/sql/sql_error.h @@ -1174,7 +1174,7 @@ public: void copy_non_errors_from_wi(THD *thd, const Warning_info *src_wi); -private: +protected: Warning_info *get_warning_info() { return m_wi_stack.front(); } const Warning_info *get_warning_info() const { return m_wi_stack.front(); } diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index b12b470209c..9debb07aa9f 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -222,7 +222,7 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, table_list->view_db.str, table_list->view_name.str); DBUG_RETURN(-1); } - if (values.elements != table->s->fields) + if (values.elements != table->vers_user_fields()) { my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), 1L); DBUG_RETURN(-1); @@ -1033,6 +1033,9 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, } } + if (table->versioned_by_sql()) + table->vers_update_fields(); + if ((res= table_list->view_check_option(thd, (values_list.elements == 1 ? 0 : @@ -1138,8 +1141,10 @@ values_loop_end: } else errcode= query_error_code(thd, thd->killed == NOT_KILLED); - - /* bug#22725: + + ScopedStatementReplication scoped_stmt_rpl( + table->versioned_by_engine() ? thd : NULL); + /* bug#22725: A query which per-row-loop can not be interrupted with KILLED, like INSERT, and that does not invoke stored @@ -1559,6 +1564,13 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, if (!table) table= table_list->table; + if (table->versioned_by_sql() && duplic == DUP_REPLACE) + { + // Additional memory may be required to create historical items. + if (table_list->set_insert_values(thd->mem_root)) + DBUG_RETURN(TRUE); + } + if (!select_insert) { Item *fake_conds= 0; @@ -1610,6 +1622,25 @@ static int last_uniq_key(TABLE *table,uint keynr) /* + Inserts one historical row to a table. + + Copies content of the row from table->record[1] to table->record[0], + sets Sys_end to now() and calls ha_write_row() . +*/ + +int vers_insert_history_row(TABLE *table) +{ + DBUG_ASSERT(table->versioned_by_sql()); + restore_record(table,record[1]); + + // Set Sys_end to now() + if (table->vers_end_field()->set_time()) + DBUG_ASSERT(0); + + return table->file->ha_write_row(table->record[0]); +} + +/* Write a record to table with optional deleting of conflicting records, invoke proper triggers if needed. @@ -1813,7 +1844,23 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) } if (error != HA_ERR_RECORD_IS_THE_SAME) + { info->updated++; + if (table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if ((error= vers_insert_history_row(table))) + { + restore_record(table, record[2]); + goto err; + } + restore_record(table, record[2]); + } + info->copied++; + } + } else error= 0; /* @@ -1865,13 +1912,16 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) tables which have ON UPDATE but have no ON DELETE triggers, we just should not expose this fact to users by invoking ON UPDATE triggers. - */ - if (last_uniq_key(table,key_nr) && - !table->file->referenced_by_foreign_key() && - (!table->triggers || !table->triggers->has_delete_triggers())) + For system versioning wa also use path through delete since we would + save nothing through this cheating. + */ + if (last_uniq_key(table,key_nr) && + !table->file->referenced_by_foreign_key() && + (!table->triggers || !table->triggers->has_delete_triggers()) && + !table->versioned_by_sql()) { if ((error=table->file->ha_update_row(table->record[1], - table->record[0])) && + table->record[0])) && error != HA_ERR_RECORD_IS_THE_SAME) goto err; if (error != HA_ERR_RECORD_IS_THE_SAME) @@ -1891,9 +1941,29 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_BEFORE, TRUE)) goto before_trg_err; - if ((error=table->file->ha_delete_row(table->record[1]))) + + if (!table->versioned_by_sql()) + error= table->file->ha_delete_row(table->record[1]); + else + { + DBUG_ASSERT(table->insert_values); + store_record(table,insert_values); + restore_record(table,record[1]); + if (table->vers_end_field()->set_time()) + { + error= 1; + goto err; + } + error= table->file->ha_update_row(table->record[1], + table->record[0]); + restore_record(table,insert_values); + } + if (error) goto err; - info->deleted++; + if (!table->versioned_by_sql()) + info->deleted++; + else + info->updated++; if (!table->file->has_transactions()) thd->transaction.stmt.modified_non_trans_table= TRUE; if (table->triggers && @@ -1981,7 +2051,9 @@ int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, TABLE_LIST *t for (Field **field=entry->field ; *field ; field++) { if (!bitmap_is_set(write_set, (*field)->field_index) && - has_no_default_value(thd, *field, table_list)) + !(*field)->vers_sys_field() && + has_no_default_value(thd, *field, table_list) && + ((*field)->real_type() != MYSQL_TYPE_ENUM)) err=1; } return thd->abort_on_warning ? err : 0; @@ -3737,6 +3809,8 @@ int select_insert::send_data(List<Item> &values) DBUG_RETURN(0); thd->count_cuted_fields= CHECK_FIELD_WARN; // Calculate cuted fields + if (table->versioned_by_sql()) + table->vers_update_fields(); store_values(values); if (table->default_field && table->update_default_fields(0, info.ignore)) DBUG_RETURN(1); @@ -3795,12 +3869,16 @@ int select_insert::send_data(List<Item> &values) void select_insert::store_values(List<Item> &values) { + DBUG_ENTER("select_insert::store_values"); + if (fields->elements) fill_record_n_invoke_before_triggers(thd, table, *fields, values, 1, TRG_EVENT_INSERT); else fill_record_n_invoke_before_triggers(thd, table, table->field_to_fill(), values, 1, TRG_EVENT_INSERT); + + DBUG_VOID_RETURN; } bool select_insert::prepare_eof() @@ -4090,6 +4168,12 @@ static TABLE *create_table_from_items(THD *thd, alter_info->create_list.push_back(cr_field, thd->mem_root); } + if (create_info->vers_info.check_and_fix_implicit( + thd, alter_info, create_info, create_table->table_name)) + { + DBUG_RETURN(NULL); + } + DEBUG_SYNC(thd,"create_table_select_before_create"); /* Check if LOCK TABLES + CREATE OR REPLACE of existing normal table*/ diff --git a/sql/sql_insert.h b/sql/sql_insert.h index aea0dac6b0d..6efd680d188 100644 --- a/sql/sql_insert.h +++ b/sql/sql_insert.h @@ -37,6 +37,7 @@ void upgrade_lock_type_for_insert(THD *thd, thr_lock_type *lock_type, bool is_multi_insert); int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, TABLE_LIST *table_list); +int vers_insert_history_row(TABLE *table); int write_record(THD *thd, TABLE *table, COPY_INFO *info); void kill_delayed_threads(void); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index ce43a45b872..e4b0877e2d7 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -33,9 +33,9 @@ #include "sql_signal.h" -void LEX::parse_error() +void LEX::parse_error(uint err_number) { - thd->parse_error(); + thd->parse_error(err_number); } @@ -771,6 +771,8 @@ void LEX::start(THD *thd_arg) frame_bottom_bound= NULL; win_spec= NULL; + vers_conditions.empty(); + is_lex_started= TRUE; DBUG_VOID_RETURN; } @@ -1339,6 +1341,8 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) return WITH_CUBE_SYM; case ROLLUP_SYM: return WITH_ROLLUP_SYM; + case SYSTEM: + return WITH_SYSTEM_SYM; default: /* Save the token following 'WITH' @@ -1349,6 +1353,27 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) return WITH; } break; + case FOR_SYM: + /* + * Additional look-ahead to resolve doubtful cases like: + * SELECT ... FOR UPDATE + * SELECT ... FOR SYSTEM_TIME ... . + */ + token= lex_one_token(yylval, thd); + lip->add_digest_token(token, yylval); + switch(token) { + case SYSTEM_TIME_SYM: + return FOR_SYSTEM_TIME_SYM; + default: + /* + Save the token following 'FOR_SYM' + */ + lip->lookahead_yylval= lip->yylval; + lip->yylval= NULL; + lip->lookahead_token= token; + return FOR_SYM; + } + break; default: break; } @@ -2185,6 +2210,7 @@ void st_select_lex::init_query() join= 0; having= prep_having= where= prep_where= 0; cond_pushed_into_where= cond_pushed_into_having= 0; + saved_where= 0; olap= UNSPECIFIED_OLAP_TYPE; having_fix_field= 0; context.select_lex= this; @@ -2261,6 +2287,8 @@ void st_select_lex::init_select() with_dep= 0; join= 0; lock_type= TL_READ_DEFAULT; + vers_import_outer= false; + vers_export_outer.empty(); } /* @@ -2997,8 +3025,7 @@ void Query_tables_list::destroy_query_tables_list() */ LEX::LEX() - : explain(NULL), - result(0), arena_for_set_stmt(0), mem_root_for_set_stmt(0), + : explain(NULL), result(0), arena_for_set_stmt(0), mem_root_for_set_stmt(0), option_type(OPT_DEFAULT), context_analysis_only(0), sphead(0), is_lex_started(0), limit_rows_examined_cnt(ULONGLONG_MAX) { @@ -7227,6 +7254,19 @@ int set_statement_var_if_exists(THD *thd, const char *var_name, } +Query_tables_backup::Query_tables_backup(THD* _thd) : + thd(_thd) +{ + thd->lex->reset_n_backup_query_tables_list(&backup); +} + + +Query_tables_backup::~Query_tables_backup() +{ + thd->lex->restore_backup_query_tables_list(&backup); +} + + bool LEX::sp_add_cfetch(THD *thd, const LEX_CSTRING *name) { uint offset; @@ -7357,3 +7397,25 @@ Item *LEX::make_item_func_replace(THD *thd, new (thd->mem_root) Item_func_replace_oracle(thd, org, find, replace) : new (thd->mem_root) Item_func_replace(thd, org, find, replace); } + + +bool SELECT_LEX::vers_push_field(THD *thd, TABLE_LIST *table, const LEX_CSTRING field_name) +{ + Item_field *fld= new (thd->mem_root) Item_field(thd, &context, + table->db, table->alias, &field_name); + if (!fld) + return true; + + item_list.push_back(fld); + + if (thd->lex->view_list.elements) + { + if (LEX_STRING *l= thd->make_lex_string(field_name.str, field_name.length)) + thd->lex->view_list.push_back(l); + else + return true; + } + + return false; +} + diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 7619353d922..48afd2246bf 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -831,6 +831,7 @@ public: Item *prep_having;/* saved HAVING clause for prepared statement processing */ Item *cond_pushed_into_where; /* condition pushed into the select's WHERE */ Item *cond_pushed_into_having; /* condition pushed into the select's HAVING */ + Item *saved_where; /* Saved values of the WHERE and HAVING clauses*/ Item::cond_result cond_value, having_value; /* point on lex in which it was created, used in view subquery detection */ @@ -1018,6 +1019,12 @@ public: /* it is for correct printing SELECT options */ thr_lock_type lock_type; + /* System Versioning */ + vers_select_conds_t vers_export_outer; + bool vers_import_outer; + /* push new Item_field into item_list */ + bool vers_push_field(THD *thd, TABLE_LIST *table, const LEX_CSTRING field_name); + void init_query(); void init_select(); st_select_lex_unit* master_unit() { return (st_select_lex_unit*) master; } @@ -1946,6 +1953,18 @@ private: }; +class Query_tables_backup +{ + THD *thd; + Query_tables_list backup; + +public: + Query_tables_backup(THD *_thd); + ~Query_tables_backup(); + const Query_tables_list& get() const { return backup; } +}; + + /* st_parsing_options contains the flags for constructions that are allowed in the current statement. @@ -2682,7 +2701,6 @@ struct LEX: public Query_tables_list private: Query_arena_memroot *arena_for_set_stmt; MEM_ROOT *mem_root_for_set_stmt; - void parse_error(); bool sp_block_finalize(THD *thd, const Lex_spblock_st spblock, class sp_label **splabel); bool sp_change_context(THD *thd, const sp_pcontext *ctx, bool exclusive); @@ -2696,6 +2714,7 @@ private: bool sp_for_loop_increment(THD *thd, const Lex_for_loop_st &loop); public: + void parse_error(uint err_number= ER_SYNTAX_ERROR); inline bool is_arena_for_set_stmt() {return arena_for_set_stmt != 0;} bool set_arena_for_set_stmt(Query_arena *backup); void reset_arena_for_set_stmt(Query_arena *backup); @@ -2946,6 +2965,9 @@ public: Window_frame_bound *frame_bottom_bound; Window_spec *win_spec; + /* System Versioning */ + vers_select_conds_t vers_conditions; + inline void free_set_stmt_mem_root() { DBUG_ASSERT(!is_arena_for_set_stmt()); @@ -3647,6 +3669,11 @@ public: bool add_grant_command(THD *thd, enum_sql_command sql_command_arg, stored_procedure_type type_arg); + + Vers_parse_info &vers_get_info() + { + return create_info.vers_info; + } }; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 2f886d91780..20c0ea7a528 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -112,6 +112,7 @@ #include "wsrep_mysqld.h" #include "wsrep_thd.h" +#include "vtmd.h" static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, Parser_state *parser_state, @@ -3165,6 +3166,11 @@ bool Sql_cmd_call::execute(THD *thd) if (do_execute_sp(thd, sp)) return true; + if (sp->sp_cache_version() == ULONG_MAX) + { + sp_cache_flush(thd->sp_proc_cache, sp); + } + /* Disable slow log for the above call(), if calls are disabled. Instead we will log the executed statements to the slow log. @@ -4015,7 +4021,6 @@ mysql_execute_command(THD *thd) copy. */ Alter_info alter_info(lex->alter_info, thd->mem_root); - if (thd->is_fatal_error) { /* If out of memory when creating a copy of alter_info. */ @@ -4043,6 +4048,7 @@ mysql_execute_command(THD *thd) */ if (!(create_info.used_fields & HA_CREATE_USED_ENGINE)) create_info.use_default_db_type(thd); + /* If we are using SET CHARSET without DEFAULT, add an implicit DEFAULT to not confuse old users. (This may change). @@ -4229,6 +4235,11 @@ mysql_execute_command(THD *thd) } else { + if (create_info.vers_info.check_and_fix_implicit( + thd, &alter_info, &create_info, create_table->table_name)) + { + goto end_with_restore_list; + } /* In STATEMENT format, we probably have to replicate also temporary tables, like mysql replication does. Also check if the requested @@ -6386,6 +6397,21 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) if (check_dependencies_in_with_clauses(lex->with_clauses_list)) return 1; + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_local) + { + if (table->vers_conditions) + { + VTMD_exists vtmd(*table); + if (vtmd.check_exists(thd)) + return 1; + if (vtmd.exists && vtmd.setup_select(thd)) + return 1; + } + } + } + if (!(res= open_and_lock_tables(thd, all_tables, TRUE, 0))) { if (lex->describe) diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index fadd7009822..8784e1f1f62 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -67,6 +67,7 @@ #include "opt_range.h" // store_key_image_to_rec #include "sql_alter.h" // Alter_table_ctx #include "sql_select.h" +#include "sql_tablespace.h" // check_tablespace_name #include <algorithm> using std::max; @@ -87,6 +88,7 @@ static int get_partition_id_list_col(partition_info *, uint32 *, longlong *); static int get_partition_id_list(partition_info *, uint32 *, longlong *); static int get_partition_id_range_col(partition_info *, uint32 *, longlong *); static int get_partition_id_range(partition_info *, uint32 *, longlong *); +static int vers_get_partition_id(partition_info *, uint32 *, longlong *); static int get_part_id_charset_func_part(partition_info *, uint32 *, longlong *); static int get_part_id_charset_func_subpart(partition_info *, uint32 *); static int get_partition_id_hash_nosub(partition_info *, uint32 *, longlong *); @@ -1295,6 +1297,24 @@ static void set_up_partition_func_pointers(partition_info *part_info) part_info->get_subpartition_id= get_partition_id_hash_sub; } } + else if (part_info->part_type == VERSIONING_PARTITION) + { + part_info->get_part_partition_id= vers_get_partition_id; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + else + part_info->get_subpartition_id= get_partition_id_key_sub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + else + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } else /* LIST Partitioning */ { if (part_info->column_list) @@ -1335,6 +1355,10 @@ static void set_up_partition_func_pointers(partition_info *part_info) else part_info->get_partition_id= get_partition_id_list; } + else if (part_info->part_type == VERSIONING_PARTITION) + { + part_info->get_partition_id= vers_get_partition_id; + } else /* HASH partitioning */ { if (part_info->list_of_part_fields) @@ -1607,6 +1631,7 @@ bool fix_partition_func(THD *thd, TABLE *table, } } DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION); + DBUG_ASSERT(part_info->part_type != VERSIONING_PARTITION || part_info->column_list); /* Partition is defined. We need to verify that partitioning function is correct. @@ -1639,6 +1664,9 @@ bool fix_partition_func(THD *thd, TABLE *table, const char *error_str; if (part_info->column_list) { + if (part_info->part_type == VERSIONING_PARTITION && + part_info->vers_setup_expression(thd)) + goto end; List_iterator<const char> it(part_info->part_field_list); if (unlikely(handle_list_of_fields(thd, it, table, part_info, FALSE))) goto end; @@ -1662,6 +1690,12 @@ bool fix_partition_func(THD *thd, TABLE *table, if (unlikely(part_info->check_list_constants(thd))) goto end; } + else if (part_info->part_type == VERSIONING_PARTITION) + { + error_str= "SYSTEM_TIME"; + if (unlikely(part_info->check_range_constants(thd))) + goto end; + } else { DBUG_ASSERT(0); @@ -2182,6 +2216,20 @@ static int add_partition_values(String *str, partition_info *part_info, } while (++i < num_items); err+= str->append(')'); } + else if (part_info->part_type == VERSIONING_PARTITION) + { + switch (p_elem->type()) + { + case partition_element::AS_OF_NOW: + err+= str->append(STRING_WITH_LEN(" AS OF NOW")); + break; + case partition_element::VERSIONING: + err+= str->append(STRING_WITH_LEN(" VERSIONING")); + break; + default: + DBUG_ASSERT(0 && "wrong p_elem->type"); + } + } end: return err; } @@ -2275,13 +2323,32 @@ char *generate_partition_syntax(THD *thd, partition_info *part_info, else err+= str.append(STRING_WITH_LEN("HASH ")); break; + case VERSIONING_PARTITION: + err+= str.append(STRING_WITH_LEN("SYSTEM_TIME")); + break; default: DBUG_ASSERT(0); /* We really shouldn't get here, no use in continuing from here */ my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR)); DBUG_RETURN(NULL); } - if (part_info->part_expr) + if (part_info->part_type == VERSIONING_PARTITION) + { + Vers_part_info *vers_info= part_info->vers_info; + DBUG_ASSERT(vers_info); + if (vers_info->interval) + { + err+= str.append(STRING_WITH_LEN("INTERVAL ")); + err+= str.append_ulonglong(vers_info->interval); + err+= str.append(STRING_WITH_LEN(" SECOND ")); + } + if (vers_info->limit) + { + err+= str.append(STRING_WITH_LEN("LIMIT ")); + err+= str.append_ulonglong(vers_info->limit); + } + } + else if (part_info->part_expr) { err+= str.append('('); part_info->part_expr->print_for_table_def(&str); @@ -3088,6 +3155,83 @@ int get_partition_id_range_col(partition_info *part_info, } +int vers_get_partition_id(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + DBUG_ENTER("vers_get_partition_id"); + DBUG_ASSERT(part_info); + Field *sys_trx_end= part_info->part_field_array[STAT_TRX_END]; + DBUG_ASSERT(sys_trx_end); + TABLE *table= part_info->table; + DBUG_ASSERT(table); + Vers_part_info *vers_info= part_info->vers_info; + DBUG_ASSERT(vers_info); + DBUG_ASSERT(vers_info->initialized()); + DBUG_ASSERT(sys_trx_end->table == table); + bool tmp_off= false; + if (!table->versioned() && table->file->native_versioned()) + { + // in copy_data_between_tables() versioning may be temporarily turned off + tmp_off= true; + table->s->versioned= true; + } + DBUG_ASSERT(table->versioned()); + DBUG_ASSERT(table->vers_end_field() == sys_trx_end); + + // new rows have NULL in sys_trx_end + if (sys_trx_end->is_max() || sys_trx_end->is_null()) + { + *part_id= vers_info->now_part->id; + } + else // row is historical + { + THD *thd= current_thd; + + switch (thd->lex->sql_command) + { + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_ALTER_TABLE: + mysql_mutex_lock(&table->s->LOCK_rotation); + if (table->s->busy_rotation) + { + table->s->vers_wait_rotation(); + part_info->vers_hist_part(); + } + else + { + table->s->busy_rotation= true; + mysql_mutex_unlock(&table->s->LOCK_rotation); + // transaction is not yet pushed to VTQ, so we use now-time + my_time_t end_ts= sys_trx_end->table->versioned_by_engine() ? + my_time(0) : sys_trx_end->get_timestamp(); + if (part_info->vers_limit_exceed() || part_info->vers_interval_exceed(end_ts)) + { + part_info->vers_part_rotate(thd); + } + mysql_mutex_lock(&table->s->LOCK_rotation); + mysql_cond_broadcast(&table->s->COND_rotation); + table->s->busy_rotation= false; + } + mysql_mutex_unlock(&table->s->LOCK_rotation); + break; + default: + ; + } + *part_id= vers_info->hist_part->id; + } + + if (tmp_off) + table->s->versioned= false; + + DBUG_PRINT("exit",("partition: %d", *part_id)); + DBUG_RETURN(0); +} + + int get_partition_id_range(partition_info *part_info, uint32 *part_id, longlong *func_value) @@ -4654,7 +4798,8 @@ uint prep_alter_part_table(THD *thd, TABLE *table, Alter_info *alter_info, must know the number of new partitions in this case. */ if (thd->lex->no_write_to_binlog && - tab_part_info->part_type != HASH_PARTITION) + tab_part_info->part_type != HASH_PARTITION && + tab_part_info->part_type != VERSIONING_PARTITION) { my_error(ER_NO_BINLOG_ERROR, MYF(0)); goto err; @@ -4859,6 +5004,21 @@ that are reorganised. partition configuration is made. */ { + partition_element *now_part= NULL; + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + List_iterator<partition_element> it(tab_part_info->partitions); + partition_element *el; + while ((el= it++)) + { + if (el->type() == partition_element::AS_OF_NOW) + { + DBUG_ASSERT(tab_part_info->vers_info && el == tab_part_info->vers_info->now_part); + it.remove(); + now_part= el; + } + } + } List_iterator<partition_element> alt_it(alt_part_info->partitions); uint part_count= 0; do @@ -4873,6 +5033,15 @@ that are reorganised. } } while (++part_count < num_new_partitions); tab_part_info->num_parts+= num_new_partitions; + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + DBUG_ASSERT(now_part); + if (tab_part_info->partitions.push_back(now_part, thd->mem_root)) + { + mem_alloc_error(1); + goto err; + } + } } /* If we specify partitions explicitly we don't use defaults anymore. @@ -4906,16 +5075,28 @@ that are reorganised. List_iterator<partition_element> part_it(tab_part_info->partitions); tab_part_info->is_auto_partitioned= FALSE; - if (!(tab_part_info->part_type == RANGE_PARTITION || - tab_part_info->part_type == LIST_PARTITION)) + if (tab_part_info->part_type == VERSIONING_PARTITION) { - my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP"); - goto err; + if (num_parts_dropped >= tab_part_info->num_parts - 1) + { + DBUG_ASSERT(table && table->s && table->s->table_name.str); + my_error(ER_VERS_WRONG_PARTS, MYF(0), table->s->table_name.str); + goto err; + } } - if (num_parts_dropped >= tab_part_info->num_parts) + else { - my_error(ER_DROP_LAST_PARTITION, MYF(0)); - goto err; + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP"); + goto err; + } + if (num_parts_dropped >= tab_part_info->num_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + goto err; + } } do { @@ -4923,6 +5104,12 @@ that are reorganised. if (is_name_in_list(part_elem->partition_name, alter_info->partition_names)) { + if (part_elem->type() == partition_element::AS_OF_NOW) + { + DBUG_ASSERT(table && table->s && table->s->table_name.str); + my_error(ER_VERS_WRONG_PARTS, MYF(0), table->s->table_name.str); + goto err; + } /* Set state to indicate that the partition is to be dropped. */ @@ -5245,6 +5432,12 @@ the generated partition syntax in a correct manner. tab_part_info->use_default_subpartitions= FALSE; tab_part_info->use_default_num_subpartitions= FALSE; } + + if (alter_info->flags & Alter_info::ALTER_ADD_PARTITION && + tab_part_info->part_type == VERSIONING_PARTITION && + tab_part_info->vers_setup_expression(thd, alt_part_info->partitions.elements)) + goto err; + if (tab_part_info->check_partition_info(thd, (handlerton**)NULL, table->file, 0, TRUE)) { @@ -6916,6 +7109,39 @@ err: } #endif + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + + SYNOPSIS + set_field_ptr() + ptr Array of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_field_ptr(Field **ptr, const uchar *new_buf, + const uchar *old_buf) +{ + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_field_ptr"); + + do + { + (*ptr)->move_field_offset(diff); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + /* Prepare for calling val_int on partition function by setting fields to point to the record where the values of the PF-fields are stored. @@ -6954,6 +7180,61 @@ void set_key_field_ptr(KEY *key_info, const uchar *new_buf, } +/** + Append all fields in read_set to string + + @param[in,out] str String to append to. + @param[in] row Row to append. + @param[in] table Table containing read_set and fields for the row. +*/ +void append_row_to_str(String &str, const uchar *row, TABLE *table) +{ + Field **fields, **field_ptr; + const uchar *rec; + uint num_fields= bitmap_bits_set(table->read_set); + uint curr_field_index= 0; + bool is_rec0= !row || row == table->record[0]; + if (!row) + rec= table->record[0]; + else + rec= row; + + /* Create a new array of all read fields. */ + fields= (Field**) my_malloc(sizeof(void*) * (num_fields + 1), + MYF(0)); + if (!fields) + return; + fields[num_fields]= NULL; + for (field_ptr= table->field; + *field_ptr; + field_ptr++) + { + if (!bitmap_is_set(table->read_set, (*field_ptr)->field_index)) + continue; + fields[curr_field_index++]= *field_ptr; + } + + + if (!is_rec0) + set_field_ptr(fields, rec, table->record[0]); + + for (field_ptr= fields; + *field_ptr; + field_ptr++) + { + Field *field= *field_ptr; + str.append(" "); + str.append(field->field_name); + str.append(":"); + field_unpack(&str, field, rec, 0, false); + } + + if (!is_rec0) + set_field_ptr(fields, table->record[0], rec); + my_free(fields); +} + + /* SYNOPSIS mem_alloc_error() @@ -7100,6 +7381,7 @@ static void set_up_range_analysis_info(partition_info *part_info) switch (part_info->part_type) { case RANGE_PARTITION: case LIST_PARTITION: + case VERSIONING_PARTITION: if (!part_info->column_list) { if (part_info->part_expr->get_monotonicity_info() != NON_MONOTONIC) @@ -7400,7 +7682,7 @@ int get_part_iter_for_interval_cols_via_map(partition_info *part_info, uint full_length= 0; DBUG_ENTER("get_part_iter_for_interval_cols_via_map"); - if (part_info->part_type == RANGE_PARTITION) + if (part_info->part_type == RANGE_PARTITION || part_info->part_type == VERSIONING_PARTITION) { get_col_endpoint= get_partition_id_cols_range_for_endpoint; part_iter->get_next= get_next_partition_id_range; @@ -7446,7 +7728,7 @@ int get_part_iter_for_interval_cols_via_map(partition_info *part_info, } if (flags & NO_MAX_RANGE) { - if (part_info->part_type == RANGE_PARTITION) + if (part_info->part_type == RANGE_PARTITION || part_info->part_type == VERSIONING_PARTITION) part_iter->part_nums.end= part_info->num_parts; else /* LIST_PARTITION */ { @@ -8143,4 +8425,52 @@ uint get_partition_field_store_length(Field *field) store_length+= HA_KEY_BLOB_LENGTH; return store_length; } + +// FIXME: duplicate of ha_partition::set_up_table_before_create +bool set_up_table_before_create(THD *thd, + TABLE_SHARE *share, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + partition_element *part_elem) +{ + bool error= false; + const char *partition_name; + DBUG_ENTER("set_up_table_before_create"); + + DBUG_ASSERT(part_elem); + + if (!part_elem) + DBUG_RETURN(true); + share->max_rows= part_elem->part_max_rows; + share->min_rows= part_elem->part_min_rows; + partition_name= strrchr(partition_name_with_path, FN_LIBCHAR); + if ((part_elem->index_file_name && + (error= append_file_to_dir(thd, + const_cast<const char**>(&part_elem->index_file_name), + partition_name+1))) || + (part_elem->data_file_name && + (error= append_file_to_dir(thd, + const_cast<const char**>(&part_elem->data_file_name), + partition_name+1)))) + { + DBUG_RETURN(error); + } + if (part_elem->index_file_name != NULL) + { + info->index_file_name= part_elem->index_file_name; + } + if (part_elem->data_file_name != NULL) + { + info->data_file_name= part_elem->data_file_name; + } + if (part_elem->tablespace_name != NULL) + { + if (check_tablespace_name(part_elem->tablespace_name) != IDENT_NAME_OK) + { + DBUG_RETURN(true); + } + info->tablespace= part_elem->tablespace_name; + } + DBUG_RETURN(error); +} #endif diff --git a/sql/sql_partition.h b/sql/sql_partition.h index 992229afb05..626ceee3f13 100644 --- a/sql/sql_partition.h +++ b/sql/sql_partition.h @@ -41,6 +41,7 @@ typedef struct st_key_range key_range; #define HA_CAN_UPDATE_PARTITION_KEY (1 << 1) #define HA_CAN_PARTITION_UNIQUE (1 << 2) #define HA_USE_AUTO_PARTITION (1 << 3) +#define HA_ONLY_VERS_PARTITION (1 << 4) #define NORMAL_PART_NAME 0 #define TEMP_PART_NAME 1 @@ -128,6 +129,14 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info, bool check_part_func_fields(Field **ptr, bool ok_with_charsets); bool field_is_partition_charset(Field *field); Item* convert_charset_partition_constant(Item *item, CHARSET_INFO *cs); +/** + Append all fields in read_set to string + + @param[in,out] str String to append to. + @param[in] row Row to append. + @param[in] table Table containing read_set and fields for the row. +*/ +void append_row_to_str(String &str, const uchar *row, TABLE *table); void mem_alloc_error(size_t size); void truncate_partition_filename(char *path); @@ -291,4 +300,30 @@ int __attribute__((warn_unused_result)) void set_key_field_ptr(KEY *key_info, const uchar *new_buf, const uchar *old_buf); +/** Set up table for creating a partition. +Copy info from partition to the table share so the created partition +has the correct info. + @param thd THD object + @param share Table share to be updated. + @param info Create info to be updated. + @param part_elem partition_element containing the info. + + @return status + @retval TRUE Error + @retval FALSE Success + + @details + Set up + 1) Comment on partition + 2) MAX_ROWS, MIN_ROWS on partition + 3) Index file name on partition + 4) Data file name on partition +*/ +bool set_up_table_before_create(THD *thd, + TABLE_SHARE *share, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + partition_element *part_elem); + #endif /* SQL_PARTITION_INCLUDED */ + diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 61e312646da..31cc6bcdd31 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -30,6 +30,7 @@ #include "sql_base.h" // tdc_remove_table, lock_table_names, #include "sql_handler.h" // mysql_ha_rm_tables #include "sql_statistics.h" +#include "vtmd.h" static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error); @@ -299,12 +300,23 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const char *new_db, LEX_CSTRING new_db_name= { new_db, strlen(new_db)}; (void) rename_table_in_stat_tables(thd, &db_name, &table_name, &new_db_name, &new_table); - if ((rc= Table_triggers_list::change_table_name(thd, ren_table->db, - old_alias, - ren_table->table_name, - new_db, - new_alias))) + VTMD_rename vtmd(*ren_table); + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) { + rc= vtmd.try_rename(thd, new_db_name, new_table); + if (rc) + goto revert_table_name; + } + rc= Table_triggers_list::change_table_name(thd, ren_table->db, + old_alias, + ren_table->table_name, + new_db, + new_alias); + if (rc) + { + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + vtmd.revert_rename(thd, new_db_name); +revert_table_name: /* We've succeeded in renaming table's .frm and in updating corresponding handler data, but have failed to update table's diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 53d445177f7..7b537bef53b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -54,12 +54,14 @@ #include "sql_statistics.h" #include "sql_cte.h" #include "sql_window.h" +#include "tztime.h" #include "debug_sync.h" // DEBUG_SYNC #include <m_ctype.h> #include <my_bit.h> #include <hash.h> #include <ft_global.h> +#include "sys_vars_shared.h" /* A key part number that means we're using a fulltext scan. @@ -669,6 +671,362 @@ setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array, DBUG_RETURN(res); } +bool vers_select_conds_t::init_from_sysvar(THD *thd) +{ + st_vers_current_time &in= thd->variables.vers_current_time; + type= in.type; + unit_start= UNIT_TIMESTAMP; + if (type != FOR_SYSTEM_TIME_UNSPECIFIED && type != FOR_SYSTEM_TIME_ALL) + { + DBUG_ASSERT(type == FOR_SYSTEM_TIME_AS_OF); + start= new (thd->mem_root) + Item_datetime_literal(thd, &in.ltime, TIME_SECOND_PART_DIGITS); + if (!start) + return true; + } + else + start= NULL; + end= NULL; + return false; +} + +int vers_setup_select(THD *thd, TABLE_LIST *tables, COND **where_expr, + SELECT_LEX *slex) +{ + DBUG_ENTER("vers_setup_select"); +#define newx new (thd->mem_root) + + TABLE_LIST *table; + int versioned_tables= 0; + + if (!thd->stmt_arena->is_conventional() && + !thd->stmt_arena->is_stmt_prepare() && !thd->stmt_arena->is_sp_execute()) + { + // statement is already prepared + DBUG_RETURN(0); + } + + for (table= tables; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + versioned_tables++; + else if (table->vers_conditions) + { + my_error(ER_VERSIONING_REQUIRED, MYF(0), table->alias); + DBUG_RETURN(-1); + } + } + + if (versioned_tables == 0) + DBUG_RETURN(0); + + /* For prepared statements we create items on statement arena, + because they must outlive execution phase for multiple executions. */ + Query_arena_stmt on_stmt_arena(thd); + + if (slex->saved_where) + { + DBUG_ASSERT(thd->stmt_arena->is_sp_execute()); + /* 2. this copy_andor_structure() is also required by the same reason */ + *where_expr= slex->saved_where->copy_andor_structure(thd); + } + else if (thd->stmt_arena->is_sp_execute()) + { + if (thd->stmt_arena->is_stmt_execute()) // SP executed second time (STMT_EXECUTED) + *where_expr= 0; + else if (*where_expr) // SP executed first time (STMT_INITIALIZED_FOR_SP) + /* 1. copy_andor_structure() is required since this andor tree + is modified later (and on shorter arena) */ + slex->saved_where= (*where_expr)->copy_andor_structure(thd); + } + + /* We have to save also non-versioned on_expr since we may have + conjuction of versioned + non-versioned */ + if (thd->stmt_arena->is_sp_execute()) + { + for (table= tables; table; table= table->next_local) + { + if (!table->table) + continue; + + if (table->saved_on_expr) // same logic as saved_where + { + if (table->on_expr) + table->on_expr= table->saved_on_expr->copy_andor_structure(thd); + else + // on_expr was moved to WHERE (see below: Add ON expression to the WHERE) + *where_expr= and_items(thd, + *where_expr, + table->saved_on_expr->copy_andor_structure(thd)); + } + else if (table->on_expr && + thd->stmt_arena->state == Query_arena::STMT_INITIALIZED_FOR_SP) + { + table->saved_on_expr= table->on_expr->copy_andor_structure(thd); + } + } + } + + SELECT_LEX *outer_slex= slex->next_select_in_list(); + // propagate derived conditions to outer SELECT_LEX + if (outer_slex && slex->vers_export_outer) + { + for (table= outer_slex->table_list.first; table; table= table->next_local) + { + if (!table->vers_conditions) + { + table->vers_conditions= slex->vers_export_outer; + table->vers_conditions.from_inner= true; + } + } + } + + for (table= tables; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + { + vers_select_conds_t &vers_conditions= table->vers_conditions; + + // propagate system_time from nearest outer SELECT_LEX + if (!vers_conditions && outer_slex && slex->vers_import_outer) + { + TABLE_LIST* derived= slex->master_unit()->derived; + while (outer_slex && (!derived->vers_conditions || derived->vers_conditions.from_inner)) + { + derived= outer_slex->master_unit()->derived; + outer_slex= outer_slex->next_select_in_list(); + } + if (outer_slex) + { + DBUG_ASSERT(derived); + DBUG_ASSERT(derived->vers_conditions); + vers_conditions= derived->vers_conditions; + } + } + + // propagate system_time from sysvar + if (!vers_conditions) + { + if (vers_conditions.init_from_sysvar(thd)) + DBUG_RETURN(-1); + } + + if (vers_conditions) + { + switch (slex->lock_type) + { + case TL_WRITE_ALLOW_WRITE: + case TL_WRITE_CONCURRENT_INSERT: + case TL_WRITE_DELAYED: + case TL_WRITE_DEFAULT: + case TL_WRITE_LOW_PRIORITY: + case TL_WRITE: + case TL_WRITE_ONLY: + my_error(ER_VERS_HISTORY_LOCK, MYF(0)); + DBUG_RETURN(-1); + default: + break; + } + + if (vers_conditions == FOR_SYSTEM_TIME_ALL) + continue; + } // if (vers_conditions) + + COND** dst_cond= where_expr; + if (table->on_expr) + { + dst_cond= &table->on_expr; + } + + if (TABLE_LIST *t= table->embedding) + { + if (t->on_expr) + dst_cond= &t->on_expr; + } + + const LEX_CSTRING *fstart= &table->table->vers_start_field()->field_name; + const LEX_CSTRING *fend= &table->table->vers_end_field()->field_name; + + Item *row_start= + newx Item_field(thd, &slex->context, table->db, table->alias, fstart); + Item *row_end= + newx Item_field(thd, &slex->context, table->db, table->alias, fend); + Item *row_end2= row_end; + + bool tmp_from_ib= + table->table->s->table_category == TABLE_CATEGORY_TEMPORARY && + table->table->vers_start_field()->type() == MYSQL_TYPE_LONGLONG; + bool timestamps_only= table->table->versioned_by_sql() && !tmp_from_ib; + + if (vers_conditions) + { + vers_conditions.resolve_units(timestamps_only); + if (timestamps_only) + { + if (vers_conditions.unit_start == UNIT_TRX_ID || vers_conditions.unit_end == UNIT_TRX_ID) + { + my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->table_name); + DBUG_RETURN(-1); + } + } + else if (thd->variables.vers_innodb_algorithm_simple) + { + DBUG_ASSERT(table->table->s && table->table->s->db_plugin); + handlerton *hton= plugin_hton(table->table->s->db_plugin); + DBUG_ASSERT(hton); + bool convert_start= false; + bool convert_end= false; + switch (vers_conditions.type) + { + case FOR_SYSTEM_TIME_AS_OF: + if (vers_conditions.unit_start == UNIT_TIMESTAMP) + convert_start= convert_end= true; + break; + case FOR_SYSTEM_TIME_BEFORE: + if (vers_conditions.unit_start == UNIT_TIMESTAMP) + convert_end= true; + break; + case FOR_SYSTEM_TIME_FROM_TO: + case FOR_SYSTEM_TIME_BETWEEN: + if (vers_conditions.unit_start == UNIT_TIMESTAMP) + convert_end= true; + if (vers_conditions.unit_end == UNIT_TIMESTAMP) + convert_start= true; + default: + break; + } + if (convert_start) + row_start= newx Item_func_vtq_ts( + thd, + hton, + row_start, + VTQ_COMMIT_TS); + if (convert_end) + row_end= newx Item_func_vtq_ts( + thd, + hton, + row_end, + VTQ_COMMIT_TS); + } + } + + Item *cond1= 0, *cond2= 0, *curr= 0; + // Temporary tables of can be created from INNODB tables and thus will + // have uint64 type of sys_trx_(start|end) field. + // They need special handling. + TABLE *t= table->table; + if (tmp_from_ib || t->versioned_by_sql() || + thd->variables.vers_innodb_algorithm_simple) + { + switch (vers_conditions.type) + { + case FOR_SYSTEM_TIME_UNSPECIFIED: + if (t->vers_start_field()->real_type() != MYSQL_TYPE_LONGLONG) + { + MYSQL_TIME max_time; + thd->variables.time_zone->gmt_sec_to_TIME(&max_time, TIMESTAMP_MAX_VALUE); + max_time.second_part= TIME_MAX_SECOND_PART; + curr= newx Item_datetime_literal(thd, &max_time, + TIME_SECOND_PART_DIGITS); + cond1= newx Item_func_eq(thd, row_end, curr); + } + else + { + curr= newx Item_int(thd, ULONGLONG_MAX); + cond1= newx Item_func_eq(thd, row_end2, curr); + } + break; + case FOR_SYSTEM_TIME_AS_OF: + cond1= newx Item_func_le(thd, row_start, + vers_conditions.start); + cond2= newx Item_func_gt(thd, row_end, + vers_conditions.start); + break; + case FOR_SYSTEM_TIME_FROM_TO: + cond1= newx Item_func_lt(thd, row_start, + vers_conditions.end); + cond2= newx Item_func_ge(thd, row_end, + vers_conditions.start); + break; + case FOR_SYSTEM_TIME_BETWEEN: + cond1= newx Item_func_le(thd, row_start, + vers_conditions.end); + cond2= newx Item_func_ge(thd, row_end, + vers_conditions.start); + break; + case FOR_SYSTEM_TIME_BEFORE: + cond1= newx Item_func_lt(thd, row_end, + vers_conditions.start); + break; + default: + DBUG_ASSERT(0); + } + } + else + { + DBUG_ASSERT(table->table->s && table->table->s->db_plugin); + handlerton *hton= plugin_hton(table->table->s->db_plugin); + DBUG_ASSERT(hton); + + Item *trx_id0, *trx_id1; + + switch (vers_conditions.type) + { + case FOR_SYSTEM_TIME_UNSPECIFIED: + curr= newx Item_int(thd, ULONGLONG_MAX); + cond1= newx Item_func_eq(thd, row_end2, curr); + break; + case FOR_SYSTEM_TIME_AS_OF: + trx_id0= vers_conditions.unit_start == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, hton, vers_conditions.start, VTQ_TRX_ID) : + vers_conditions.start; + cond1= newx Item_func_vtq_trx_sees_eq(thd, hton, trx_id0, row_start); + cond2= newx Item_func_vtq_trx_sees(thd, hton, row_end, trx_id0); + break; + case FOR_SYSTEM_TIME_FROM_TO: + case FOR_SYSTEM_TIME_BETWEEN: + trx_id0= vers_conditions.unit_start == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, hton, vers_conditions.start, VTQ_TRX_ID, true) : + vers_conditions.start; + trx_id1= vers_conditions.unit_end == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, hton, vers_conditions.end, VTQ_TRX_ID, false) : + vers_conditions.end; + cond1= vers_conditions.type == FOR_SYSTEM_TIME_FROM_TO ? + newx Item_func_vtq_trx_sees(thd, hton, trx_id1, row_start) : + newx Item_func_vtq_trx_sees_eq(thd, hton, trx_id1, row_start); + cond2= newx Item_func_vtq_trx_sees_eq(thd, hton, row_end, trx_id0); + break; + case FOR_SYSTEM_TIME_BEFORE: + trx_id0= vers_conditions.unit_start == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, hton, vers_conditions.start, VTQ_TRX_ID) : + vers_conditions.start; + cond1= newx Item_func_lt(thd, row_end, trx_id0); + break; + default: + DBUG_ASSERT(0); + } + } + + if (cond1) + { + cond1= and_items(thd, + *dst_cond, + and_items(thd, + cond2, + cond1)); + + if (on_stmt_arena.arena_replaced()) + *dst_cond= cond1; + else + thd->change_item_tree(dst_cond, cond1); + } + } // if (... table->table->versioned()) + } // for (table= tables; ...) + + DBUG_RETURN(0); +#undef newx +} + /***************************************************************************** Check fields, find best join, do the select and output fields. mysql_select assumes that all tables are already opened @@ -744,7 +1102,11 @@ JOIN::prepare(TABLE_LIST *tables_init, { remove_redundant_subquery_clauses(select_lex); } - + + /* System Versioning: handle FOR SYSTEM_TIME clause. */ + if (vers_setup_select(thd, tables_list, &conds, select_lex) < 0) + DBUG_RETURN(-1); + /* TRUE if the SELECT list mixes elements with and without grouping, and there is no GROUP BY clause. Mixing non-aggregated fields with @@ -1026,6 +1388,46 @@ JOIN::prepare(TABLE_LIST *tables_init, if (!procedure && result && result->prepare(fields_list, unit_arg)) goto err; /* purecov: inspected */ + if (!thd->stmt_arena->is_stmt_prepare()) + { + bool have_versioned_tables= false; + for (TABLE_LIST *table= tables_list; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + { + have_versioned_tables= true; + break; + } + } + + if (have_versioned_tables) + { + Item_transformer transformer= &Item::vers_optimized_fields_transformer; + + if (conds) + { + conds= conds->transform(thd, transformer, NULL); + } + + for (ORDER *ord= order; ord; ord= ord->next) + { + ord->item_ptr= (*ord->item)->transform(thd, transformer, NULL); + ord->item= &ord->item_ptr; + } + + for (ORDER *ord= group_list; ord; ord= ord->next) + { + ord->item_ptr= (*ord->item)->transform(thd, transformer, NULL); + ord->item= &ord->item_ptr; + } + + if (having) + { + having= having->transform(thd, transformer, NULL); + } + } + } + unit= unit_arg; if (prepare_stage2()) goto err; @@ -3545,6 +3947,16 @@ void JOIN::exec_inner() result->send_result_set_metadata( procedure ? procedure_fields_list : *fields, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); + + { + List_iterator<Item> it(*columns_list); + while (Item *item= it++) + { + Item_transformer transformer= &Item::vers_optimized_fields_transformer; + it.replace(item->transform(thd, transformer, NULL)); + } + } + error= do_select(this, procedure); /* Accumulate the counts from all join iterations of all join parts. */ thd->inc_examined_row_count(join_examined_rows); @@ -16289,7 +16701,12 @@ Field *create_tmp_field_from_field(THD *thd, Field *org_field, item->result_field= new_field; else new_field->field_name= *name; - new_field->flags|= (org_field->flags & NO_DEFAULT_VALUE_FLAG); + new_field->flags|= (org_field->flags & ( + NO_DEFAULT_VALUE_FLAG | + HIDDEN_FLAG | + VERS_SYS_START_FLAG | + VERS_SYS_END_FLAG | + VERS_OPTIMIZED_UPDATE_FLAG)); if (org_field->maybe_null() || (item && item->maybe_null)) new_field->flags&= ~NOT_NULL_FLAG; // Because of outer join if (org_field->type() == MYSQL_TYPE_VAR_STRING || @@ -16884,6 +17301,8 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, List_iterator_fast<Item> li(fields); Item *item; Field **tmp_from_field=from_field; + Field *sys_trx_start= NULL; + Field *sys_trx_end= NULL; while ((item=li++)) { Item::Type type= item->type(); @@ -17006,6 +17425,31 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, continue; // Some kind of const item } DBUG_ASSERT(!new_field->field_name.str || strlen(new_field->field_name.str) == new_field->field_name.length); + + if (type == Item::FIELD_ITEM || type == Item::REF_ITEM) + { + if (item->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *item_field= (Item_field *)item->real_item(); + Field *field= item_field->field; + TABLE_SHARE *s= field->table->s; + if (s->versioned) + { + if (field->flags & VERS_SYS_START_FLAG) + sys_trx_start= new_field; + else if (field->flags & VERS_SYS_END_FLAG) + sys_trx_end= new_field; + } + } + } + if (type == Item::TYPE_HOLDER) + { + Item_type_holder *ith= (Item_type_holder*)item; + if (ith->flags & VERS_SYS_START_FLAG) + sys_trx_start= new_field; + else if (ith->flags & VERS_SYS_END_FLAG) + sys_trx_end= new_field; + } if (type == Item::SUM_FUNC_ITEM) { Item_sum *agg_item= (Item_sum *) item; @@ -17086,6 +17530,21 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, total_uneven_bit_length= 0; } } + + if (sys_trx_start && sys_trx_end) + { + sys_trx_start->flags|= VERS_SYS_START_FLAG | HIDDEN_FLAG; + sys_trx_end->flags|= VERS_SYS_END_FLAG | HIDDEN_FLAG; + share->versioned= true; + share->field= table->field; + share->row_start_field= sys_trx_start->field_index; + share->row_end_field= sys_trx_end->field_index; + } + else + { + DBUG_ASSERT(!sys_trx_start && !sys_trx_end); + } + DBUG_ASSERT(fieldnr == (uint) (reg_field - table->field)); DBUG_ASSERT(field_count >= (uint) (reg_field - table->field)); field_count= fieldnr; @@ -25575,6 +26034,11 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, } #endif /* WITH_PARTITION_STORAGE_ENGINE */ } + if (table && table->versioned()) + { + // versioning conditions are already unwrapped to WHERE clause + str->append(" FOR SYSTEM_TIME ALL"); + } if (my_strcasecmp(table_alias_charset, cmp_name, alias)) { char t_alias_buff[MAX_ALIAS_NAME]; diff --git a/sql/sql_select.h b/sql/sql_select.h index b6b8deb99f5..b57452f8934 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -2330,4 +2330,7 @@ int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort); JOIN_TAB *first_explain_order_tab(JOIN* join); JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab); +int vers_setup_select(THD *thd, TABLE_LIST *tables, COND **where_expr, + SELECT_LEX *slex); + #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_show.cc b/sql/sql_show.cc index cd08959cc26..4b28b0d18da 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -62,6 +62,8 @@ #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" #endif +#include "vtmd.h" +#include "transaction.h" enum enum_i_s_events_fields { @@ -569,6 +571,7 @@ static struct show_privileges_st sys_privileges[]= {"Create view", "Tables", "To create new views"}, {"Create user", "Server Admin", "To create new users"}, {"Delete", "Tables", "To delete existing rows"}, + {"Delete versioning rows", "Tables", "To delete versioning table historical rows"}, {"Drop", "Databases,Tables", "To drop databases, tables, and views"}, #ifdef HAVE_EVENT_SCHEDULER {"Event","Server Admin","To create, alter, drop and execute events"}, @@ -1281,6 +1284,15 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list) */ MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + TABLE_LIST archive; + bool versioned= table_list->vers_conditions; + if (versioned) + { + DBUG_ASSERT(table_list->vers_conditions == FOR_SYSTEM_TIME_AS_OF); + VTMD_table vtmd(*table_list); + if (vtmd.setup_select(thd)) + goto exit; + } if (mysqld_show_create_get_fields(thd, table_list, &field_list, &buffer)) goto exit; @@ -1323,6 +1335,13 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list) my_eof(thd); exit: + if (versioned) + { + /* If commit fails, we should be able to reset the OK status. */ + thd->get_stmt_da()->set_overwrite_status(true); + trans_commit_stmt(thd); + thd->get_stmt_da()->set_overwrite_status(false); + } close_thread_tables(thd); /* Release any metadata locks taken during SHOW CREATE. */ thd->mdl_context.rollback_to_savepoint(mdl_savepoint); @@ -1685,6 +1704,7 @@ static bool get_field_default_value(THD *thd, Field *field, String *def_value, has_default= (field->default_value || (!(field->flags & NO_DEFAULT_VALUE_FLAG) && + !field->vers_sys_field() && field->unireg_check != Field::NEXT_NUMBER)); def_value->length(0); @@ -2004,6 +2024,7 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, TABLE *table= table_list->table; TABLE_SHARE *share= table->s; sql_mode_t sql_mode= thd->variables.sql_mode; + ulong vers_hide= thd->variables.vers_hide; bool foreign_db_mode= sql_mode & (MODE_POSTGRESQL | MODE_ORACLE | MODE_MSSQL | MODE_DB2 | MODE_MAXDB | MODE_ANSI); @@ -2043,7 +2064,7 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, alias= table_list->schema_table->table_name; else { - if (lower_case_table_names == 2) + if (lower_case_table_names == 2 || table_list->vers_force_alias) alias= table->alias.c_ptr(); else { @@ -2082,6 +2103,10 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, { uint flags = field->flags; + if (vers_hide == VERS_HIDE_FULL && + (flags & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG))) + continue; + if (ptr != table->field) packet->append(STRING_WITH_LEN(",\n")); @@ -2126,9 +2151,18 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, } else { + if (field->flags & VERS_SYS_START_FLAG) + { + packet->append(STRING_WITH_LEN(" GENERATED ALWAYS AS ROW START")); + } + else if (field->flags & VERS_SYS_END_FLAG) + { + packet->append(STRING_WITH_LEN(" GENERATED ALWAYS AS ROW END")); + } + if (flags & NOT_NULL_FLAG) packet->append(STRING_WITH_LEN(" NOT NULL")); - else if (field->type() == MYSQL_TYPE_TIMESTAMP) + else if (field->type() == MYSQL_TYPE_TIMESTAMP && !field->vers_sys_field()) { /* TIMESTAMP field require explicit NULL flag, because unlike @@ -2144,6 +2178,11 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, packet->append(def_value.ptr(), def_value.length(), system_charset_info); } + if (field->flags & VERS_OPTIMIZED_UPDATE_FLAG) + { + packet->append(STRING_WITH_LEN(" WITHOUT SYSTEM VERSIONING")); + } + if (!limited_mysql_mode && print_on_update_clause(field, &def_value, false)) { @@ -2235,6 +2274,17 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, hton->index_options); } + if (table->versioned() && vers_hide != VERS_HIDE_FULL) + { + const Field *fs = table->vers_start_field(); + const Field *fe = table->vers_end_field(); + packet->append(STRING_WITH_LEN(",\n PERIOD FOR SYSTEM_TIME (")); + append_identifier(thd,packet,fs->field_name.str, fs->field_name.length); + packet->append(STRING_WITH_LEN(", ")); + append_identifier(thd,packet,fe->field_name.str, fe->field_name.length); + packet->append(STRING_WITH_LEN(")")); + } + /* Get possible foreign key definitions stored in InnoDB and append them to the CREATE TABLE statement @@ -2273,6 +2323,11 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, add_table_options(thd, table, create_info_arg, table_list->schema_table != 0, 0, packet); + if (table->versioned() && vers_hide != VERS_HIDE_FULL) + { + packet->append(STRING_WITH_LEN(" WITH SYSTEM VERSIONING")); + } + #ifdef WITH_PARTITION_STORAGE_ENGINE { if (table->part_info && @@ -4184,7 +4239,7 @@ int schema_tables_add(THD *thd, Dynamic_array<LEX_CSTRING*> *files, @retval 2 Not fatal error; Safe to ignore this file list */ -static int +int make_table_name_list(THD *thd, Dynamic_array<LEX_CSTRING*> *table_names, LEX *lex, LOOKUP_FIELD_VALUES *lookup_field_vals, LEX_CSTRING *db_name) @@ -4835,6 +4890,59 @@ public: } }; +static bool get_all_archive_tables(THD *thd, + Dynamic_array<String> &all_archive_tables) +{ + if (thd->variables.vers_hide == VERS_HIDE_NEVER) + return false; + + Dynamic_array<LEX_CSTRING *> all_db; + LOOKUP_FIELD_VALUES lookup_field_values= { + {C_STRING_WITH_LEN("%")}, {NULL, 0}, true, false}; + if (make_db_list(thd, &all_db, &lookup_field_values)) + return true; + + LEX_STRING information_schema= {C_STRING_WITH_LEN("information_schema")}; + for (size_t i= 0; i < all_db.elements(); i++) + { + LEX_CSTRING db= *all_db.at(i); + if (db.length == information_schema.length && + !memcmp(db.str, information_schema.str, db.length)) + { + all_db.del(i); + break; + } + } + + for (size_t i= 0; i < all_db.elements(); i++) + { + LEX_CSTRING db_name= *all_db.at(i); + Dynamic_array<String> archive_tables; + if (VTMD_table::get_archive_tables(thd, db_name.str, db_name.length, + archive_tables)) + return true; + for (size_t i= 0; i < archive_tables.elements(); i++) + if (all_archive_tables.push(archive_tables.at(i))) + return true; + } + + return false; +} + +static bool is_archive_table(const Dynamic_array<String> &all_archive_tables, + const LEX_CSTRING candidate) +{ + for (size_t i= 0; i < all_archive_tables.elements(); i++) + { + const String &archive_table= all_archive_tables.at(i); + if (candidate.length == archive_table.length() && + !memcmp(candidate.str, archive_table.ptr(), candidate.length)) + { + return true; + } + } + return false; +} /** @brief Fill I_S tables whose data are retrieved @@ -4877,6 +4985,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) #endif uint table_open_method= tables->table_open_method; bool can_deadlock; + Dynamic_array<String> all_archive_tables; DBUG_ENTER("get_all_tables"); /* @@ -4939,6 +5048,10 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) if (make_db_list(thd, &db_names, &plan->lookup_field_vals)) goto err; + + if (get_all_archive_tables(thd, all_archive_tables)) + goto err; + for (size_t i=0; i < db_names.elements(); i++) { LEX_CSTRING *db_name= db_names.at(i); @@ -4964,6 +5077,9 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) LEX_CSTRING *table_name= table_names.at(i); DBUG_ASSERT(table_name->length <= NAME_LEN); + if (is_archive_table(all_archive_tables, *table_name)) + continue; + #ifndef NO_EMBEDDED_ACCESS_CHECKS if (!(thd->col_access & TABLE_ACLS)) { @@ -7025,6 +7141,10 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables, tmp_res.append(STRING_WITH_LEN("HASH")); table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs); break; + case VERSIONING_PARTITION: + tmp_res.length(0); + tmp_res.append(STRING_WITH_LEN("SYSTEM_TIME")); + break; default: DBUG_ASSERT(0); my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR)); @@ -7730,7 +7850,8 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list) if (!(item=new (mem_root) Item_return_date_time(thd, fields_info->field_name, field_name_length, - fields_info->field_type))) + fields_info->field_type, + fields_info->field_length))) DBUG_RETURN(0); item->decimals= fields_info->field_length; break; diff --git a/sql/sql_show.h b/sql/sql_show.h index dc2fe9738fe..ac13099ca48 100644 --- a/sql/sql_show.h +++ b/sql/sql_show.h @@ -199,6 +199,9 @@ typedef struct st_lookup_field_values bool wild_table_value; } LOOKUP_FIELD_VALUES; +int make_table_name_list(THD *thd, Dynamic_array<LEX_CSTRING *> *table_names, + LEX *lex, LOOKUP_FIELD_VALUES *lookup_field_vals, + LEX_CSTRING *db_name); /* INFORMATION_SCHEMA: Execution plan for get_all_tables() call diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 7371889cc7f..d6b4522fd50 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -55,6 +55,9 @@ #include "transaction.h" #include "sql_audit.h" #include "sql_sequence.h" +#include "tztime.h" +#include "vtmd.h" // System Versioning + #ifdef __WIN__ #include <io.h> @@ -2293,6 +2296,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, const char *db= table->db; size_t db_length= table->db_length; handlerton *table_type= 0; + VTMD_drop vtmd(*table); DBUG_PRINT("table", ("table_l: '%s'.'%s' table: %p s: %p", table->db, table->table_name, table->table, @@ -2453,6 +2457,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, else { char *end; + int frm_delete_error= 0; /* It could happen that table's share in the table definition cache is the only thing that keeps the engine plugin loaded @@ -2491,30 +2496,51 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, // Remove extension for delete *(end= path + path_length - reg_ext_length)= '\0'; - error= ha_delete_table(thd, table_type, path, db, table->table_name, - !dont_log_query); - - if (!error) + if ((thd->lex->sql_command == SQLCOM_DROP_TABLE || + thd->lex->sql_command == SQLCOM_CREATE_TABLE) && + thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE && + table_type && table_type != view_pseudo_hton) + { + error= vtmd.check_exists(thd); + if (error) + goto non_tmp_err; + if (!vtmd.exists) + goto drop_table; + error= mysql_rename_table(table_type, table->db, table->table_name, + table->db, vtmd.archive_name(thd), NO_FK_CHECKS); + } + else { - int frm_delete_error, trigger_drop_error= 0; - /* Delete the table definition file */ - strmov(end,reg_ext); - if (table_type && table_type != view_pseudo_hton && - table_type->discover_table) + drop_table: + error= ha_delete_table(thd, table_type, path, db, table->table_name, + !dont_log_query); + if (!error) { - /* - Table type is using discovery and may not need a .frm file. - Delete it silently if it exists - */ - (void) mysql_file_delete(key_file_frm, path, MYF(0)); - frm_delete_error= 0; + /* Delete the table definition file */ + strmov(end,reg_ext); + if (table_type && table_type != view_pseudo_hton && + table_type->discover_table) + { + /* + Table type is using discovery and may not need a .frm file. + Delete it silently if it exists + */ + (void) mysql_file_delete(key_file_frm, path, MYF(0)); + } + else if (mysql_file_delete(key_file_frm, path, + MYF(MY_WME))) + { + frm_delete_error= my_errno; + DBUG_ASSERT(frm_delete_error); + } } - else - frm_delete_error= mysql_file_delete(key_file_frm, path, - MYF(MY_WME)); - if (frm_delete_error) - frm_delete_error= my_errno; - else + } + + if (!error) + { + int trigger_drop_error= 0; + + if (!frm_delete_error) { non_tmp_table_deleted= TRUE; trigger_drop_error= @@ -2527,8 +2553,21 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, else if (frm_delete_error && if_exists) thd->clear_error(); } + non_tmp_err: non_tmp_error|= MY_TEST(error); } + + if (!error && vtmd.exists) + { + enum_sql_command sql_command= thd->lex->sql_command; + thd->lex->sql_command= SQLCOM_DROP_TABLE; + error= vtmd.update(thd); + thd->lex->sql_command= sql_command; + if (error) + mysql_rename_table(table_type, table->db, vtmd.archive_name(), + table->db, table->table_name, NO_FK_CHECKS); + } + if (error) { if (wrong_tables.length()) @@ -3022,10 +3061,12 @@ void promote_first_timestamp_column(List<Create_field> *column_definitions) if (column_definition->is_timestamp_type() || // TIMESTAMP column_definition->unireg_check == Field::TIMESTAMP_OLD_FIELD) // Legacy { + DBUG_PRINT("info", ("field-ptr:%p", column_definition->field)); if ((column_definition->flags & NOT_NULL_FLAG) != 0 && // NOT NULL, column_definition->default_value == NULL && // no constant default, column_definition->unireg_check == Field::NONE && // no function default - column_definition->vcol_info == NULL) + column_definition->vcol_info == NULL && + !(column_definition->flags & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG))) // column isn't generated { DBUG_PRINT("info", ("First TIMESTAMP column '%s' was promoted to " "DEFAULT CURRENT_TIMESTAMP ON UPDATE " @@ -3336,6 +3377,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, } select_field_pos= alter_info->create_list.elements - select_field_count; + for (field_no=0; (sql_field=it++) ; field_no++) { /* @@ -4281,6 +4323,51 @@ bool Column_definition::sp_prepare_create_field(THD *thd, MEM_ROOT *mem_root) } +static bool +vers_prepare_keys(THD *thd, + HA_CREATE_INFO *create_info, + Alter_info *alter_info, + KEY **key_info, + uint key_count) +{ + DBUG_ASSERT(create_info->versioned()); + + const char *row_start_field= create_info->vers_info.as_row.start; + DBUG_ASSERT(row_start_field); + const char *row_end_field= create_info->vers_info.as_row.end; + DBUG_ASSERT(row_end_field); + + List_iterator<Key> key_it(alter_info->key_list); + Key *key= NULL; + while ((key=key_it++)) + { + if (key->type != Key::PRIMARY && key->type != Key::UNIQUE) + continue; + + Key_part_spec *key_part= NULL; + List_iterator<Key_part_spec> part_it(key->columns); + while ((key_part=part_it++)) + { + if (!my_strcasecmp(system_charset_info, + row_start_field, + key_part->field_name.str) || + + !my_strcasecmp(system_charset_info, + row_end_field, + key_part->field_name.str)) + break; + } + if (key_part) + continue; // Key already contains Sys_start or Sys_end + + Key_part_spec *key_part_sys_end_col= + new (thd->mem_root) Key_part_spec(&create_info->vers_info.as_row.end, 0); + key->columns.push_back(key_part_sys_end_col); + } + + return false; +} + handler *mysql_create_frm_image(THD *thd, const char *db, const char *table_name, HA_CREATE_INFO *create_info, @@ -4439,7 +4526,10 @@ handler *mysql_create_frm_image(THD *thd, part_info->part_info_string= part_syntax_buf; part_info->part_info_len= syntax_len; if ((!(engine_type->partition_flags && - engine_type->partition_flags() & HA_CAN_PARTITION)) || + ((engine_type->partition_flags() & HA_CAN_PARTITION) || + (part_info->part_type == VERSIONING_PARTITION && + engine_type->partition_flags() & HA_ONLY_VERS_PARTITION)) + )) || create_info->db_type == partition_hton) { /* @@ -4520,6 +4610,13 @@ handler *mysql_create_frm_image(THD *thd, } #endif + if (create_info->versioned()) + { + if(vers_prepare_keys(thd, create_info, alter_info, key_info, + *key_count)) + goto err; + } + if (mysql_prepare_create_table(thd, create_info, alter_info, &db_options, file, key_info, key_count, create_table_mode)) @@ -4977,6 +5074,7 @@ bool mysql_create_table(THD *thd, TABLE_LIST *create_table, { thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); result= 1; + goto err; } else { @@ -4985,6 +5083,20 @@ bool mysql_create_table(THD *thd, TABLE_LIST *create_table, } } + if (create_info->versioned() && + thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + VTMD_table vtmd(*create_table); + if (vtmd.update(thd)) + { + thd->variables.vers_alter_history = VERS_ALTER_HISTORY_KEEP; + mysql_rm_table_no_locks(thd, create_table, 0, 0, 0, 0, 1, 1); + thd->variables.vers_alter_history = VERS_ALTER_HISTORY_SURVIVE; + result= 1; + goto err; + } + } + err: /* In RBR we don't need to log CREATE TEMPORARY TABLE */ if (thd->is_current_stmt_binlog_format_row() && create_info->tmp_table()) @@ -5095,6 +5207,66 @@ static void make_unique_constraint_name(THD *thd, LEX_CSTRING *name, ** Alter a table definition ****************************************************************************/ +bool operator!=(const MYSQL_TIME &lhs, const MYSQL_TIME &rhs) +{ + return lhs.year != rhs.year || lhs.month != rhs.month || lhs.day != rhs.day || + lhs.hour != rhs.hour || lhs.minute != rhs.minute || + lhs.second_part != rhs.second_part || lhs.neg != rhs.neg || + lhs.time_type != rhs.time_type; +} + +// Sets sys_trx_end=MAX for rows with sys_trx_end=now(6) +static bool vers_reset_alter_copy(THD *thd, TABLE *table) +{ + const MYSQL_TIME query_start= thd->query_start_TIME(); + + READ_RECORD info; + int error= 0; + bool will_batch= false; + uint dup_key_found= 0; + if (init_read_record(&info, thd, table, NULL, NULL, 0, 1, true)) + goto err; + + will_batch= !table->file->start_bulk_update(); + + while (!(error= info.read_record())) + { + MYSQL_TIME current; + if (table->vers_end_field()->get_date(¤t, 0)) + goto err_read_record; + if (current != query_start) + { + continue; + } + + store_record(table, record[1]); + table->vers_end_field()->set_max(); + if (will_batch) + error= table->file->ha_bulk_update_row(table->record[1], table->record[0], + &dup_key_found); + else + error= table->file->ha_update_row(table->record[1], table->record[0]); + if (error && table->file->is_fatal_error(error, HA_CHECK_ALL)) + { + table->file->print_error(error, MYF(ME_FATALERROR)); + goto err_read_record; + } + } + + if (will_batch && (error= table->file->exec_bulk_update(&dup_key_found))) + table->file->print_error(error, MYF(ME_FATALERROR)); + if (will_batch) + table->file->end_bulk_update(); + +err_read_record: + end_read_record(&info); + +err: + if (table->file->ha_external_lock(thd, F_UNLCK)) + return true; + + return error ? true : false; +} /** Rename a table. @@ -5318,6 +5490,13 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, thd->work_part_info= src_table->table->part_info->get_clone(thd); #endif + if (src_table->table->versioned() && + local_create_info.vers_info.fix_create_like(thd, &local_alter_info, + &local_create_info, src_table)) + { + goto err; + } + /* Adjust description of source table before using it for creation of target table. @@ -5330,6 +5509,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, /* Replace type of source table with one specified in the statement. */ local_create_info.options&= ~HA_LEX_CREATE_TMP_TABLE; local_create_info.options|= create_info->tmp_table(); + local_create_info.options|= create_info->options; /* Reset auto-increment counter for the new table. */ local_create_info.auto_increment_value= 0; /* @@ -6214,6 +6394,8 @@ static bool fill_alter_inplace_info(THD *thd, ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_ADD_CHECK_CONSTRAINT; if (alter_info->flags & Alter_info::ALTER_DROP_CHECK_CONSTRAINT) ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_DROP_CHECK_CONSTRAINT; + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_DROP) + ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_DROP_HISTORICAL; /* If we altering table with old VARCHAR fields we will be automatically @@ -8479,18 +8661,30 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list, if (mysql_rename_table(old_db_type, alter_ctx->db, alter_ctx->table_name, alter_ctx->new_db, alter_ctx->new_alias, 0)) error= -1; - else if (Table_triggers_list::change_table_name(thd, - alter_ctx->db, - alter_ctx->alias, - alter_ctx->table_name, - alter_ctx->new_db, - alter_ctx->new_alias)) - { - (void) mysql_rename_table(old_db_type, - alter_ctx->new_db, alter_ctx->new_alias, - alter_ctx->db, alter_ctx->table_name, - NO_FK_CHECKS); - error= -1; + else + { + VTMD_rename vtmd(*table_list); + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE && + vtmd.try_rename(thd, new_db_name, new_table_name)) + { + goto revert_table_name; + } + else if (Table_triggers_list::change_table_name(thd, + alter_ctx->db, + alter_ctx->alias, + alter_ctx->table_name, + alter_ctx->new_db, + alter_ctx->new_alias)) + { + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + vtmd.revert_rename(thd, new_db_name); +revert_table_name: + (void) mysql_rename_table(old_db_type, + alter_ctx->new_db, alter_ctx->new_alias, + alter_ctx->db, alter_ctx->table_name, + NO_FK_CHECKS); + error= -1; + } } } @@ -8621,6 +8815,30 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, bool error= open_tables(thd, &table_list, &tables_opened, 0, &alter_prelocking_strategy); thd->open_options&= ~HA_OPEN_FOR_ALTER; + bool versioned= table_list->table && table_list->table->versioned(); + bool vers_data_mod= versioned && + thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE && + alter_info->vers_data_modifying(); + + if (vers_data_mod) + { + table_list->set_lock_type(thd, TL_WRITE); + if (thd->mdl_context.upgrade_shared_lock(table_list->table->mdl_ticket, + MDL_EXCLUSIVE, + thd->variables.lock_wait_timeout)) + { + DBUG_RETURN(true); + } + + if (table_list->table->versioned_by_engine() && + alter_info->requested_algorithm == + Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT && + !table_list->table->s->partition_info_str) + { + // Changle default ALGORITHM to COPY for INNODB + alter_info->requested_algorithm= Alter_info::ALTER_TABLE_ALGORITHM_COPY; + } + } DEBUG_SYNC(thd, "alter_opened_table"); @@ -8749,6 +8967,12 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, if (check_engine(thd, alter_ctx.new_db, alter_ctx.new_name, create_info)) DBUG_RETURN(true); + if (create_info->vers_info.check_and_fix_alter(thd, alter_info, create_info, + table->s)) + { + DBUG_RETURN(true); + } + if ((create_info->db_type != table->s->db_type() || alter_info->flags & Alter_info::ALTER_PARTITION) && !table->file->can_switch_engines()) @@ -8927,9 +9151,11 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, Upgrade from MDL_SHARED_UPGRADABLE to MDL_SHARED_NO_WRITE. Afterwards it's safe to take the table level lock. */ - if (thd->mdl_context.upgrade_shared_lock(mdl_ticket, MDL_SHARED_NO_WRITE, - thd->variables.lock_wait_timeout) - || lock_tables(thd, table_list, alter_ctx.tables_opened, 0)) + if ((!vers_data_mod && + thd->mdl_context.upgrade_shared_lock( + mdl_ticket, MDL_SHARED_NO_WRITE, + thd->variables.lock_wait_timeout)) || + lock_tables(thd, table_list, alter_ctx.tables_opened, 0)) { DBUG_RETURN(true); } @@ -8991,6 +9217,7 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, handlerton *new_db_type= create_info->db_type; handlerton *old_db_type= table->s->db_type(); TABLE *new_table= NULL; + bool new_versioned= false; ha_rows copied=0,deleted=0; /* @@ -9327,6 +9554,7 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, } if (!new_table) goto err_new_table_cleanup; + new_versioned= new_table->versioned(); /* Note: In case of MERGE table, we do not attach children. We do not copy data for MERGE tables. Only the children have data. @@ -9353,7 +9581,14 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, order_num, order, &copied, &deleted, alter_info->keys_onoff, &alter_ctx)) + { + if (vers_data_mod && new_versioned && table->versioned_by_sql()) + { + // Failure of this function may result in corruption of an original table. + vers_reset_alter_copy(thd, table); + } goto err_new_table_cleanup; + } } else { @@ -9448,9 +9683,14 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, Rename the old table to temporary name to have a backup in case anything goes wrong while renaming the new table. */ - char backup_name[32]; - my_snprintf(backup_name, sizeof(backup_name), "%s2-%lx-%lx", tmp_file_prefix, - current_pid, (long) thd->thread_id); + char backup_name[FN_LEN]; + if (vers_data_mod) + VTMD_table::archive_name(thd, alter_ctx.table_name, backup_name, + sizeof(backup_name)); + else + my_snprintf(backup_name, sizeof(backup_name), "%s2-%lx-%lx", + tmp_file_prefix, current_pid, thd->thread_id); + if (lower_case_table_names) my_casedn_str(files_charset_info, backup_name); if (mysql_rename_table(old_db_type, alter_ctx.db, alter_ctx.table_name, @@ -9478,6 +9718,17 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, goto err_with_mdl; } + if (vers_data_mod && new_versioned) + { + DBUG_ASSERT(alter_info && table_list); + VTMD_rename vtmd(*table_list); + bool rc= alter_info->flags & Alter_info::ALTER_RENAME ? + vtmd.try_rename(thd, alter_ctx.new_db, alter_ctx.new_alias, backup_name) : + vtmd.update(thd, backup_name); + if (rc) + goto err_after_rename; + } + // Check if we renamed the table and if so update trigger files. if (alter_ctx.is_table_renamed()) { @@ -9488,6 +9739,7 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, alter_ctx.new_db, alter_ctx.new_alias)) { +err_after_rename: // Rename succeeded, delete the new table. (void) quick_rm_table(thd, new_db_type, alter_ctx.new_db, alter_ctx.new_alias, 0); @@ -9502,7 +9754,8 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, } // ALTER TABLE succeeded, delete the backup of the old table. - if (quick_rm_table(thd, old_db_type, alter_ctx.db, backup_name, FN_IS_TMP)) + if (!(vers_data_mod && new_versioned) && + quick_rm_table(thd, old_db_type, alter_ctx.db, backup_name, FN_IS_TMP)) { /* The fact that deletion of the backup failed is not critical @@ -9689,6 +9942,11 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, sql_mode_t save_sql_mode= thd->variables.sql_mode; ulonglong prev_insert_id, time_to_report_progress; Field **dfield_ptr= to->default_field; + bool make_versioned= !from->versioned() && to->versioned(); + bool make_unversioned= from->versioned() && !to->versioned(); + bool keep_versioned= from->versioned() && to->versioned(); + Field *to_sys_trx_start= NULL, *to_sys_trx_end= NULL, *from_sys_trx_end= NULL; + MYSQL_TIME query_start; DBUG_ENTER("copy_data_between_tables"); /* Two or 3 stages; Sorting, copying data and update indexes */ @@ -9789,6 +10047,30 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, thd_progress_next_stage(thd); } + if (make_versioned) + { + query_start= thd->query_start_TIME(); + to_sys_trx_start= to->vers_start_field(); + to_sys_trx_end= to->vers_end_field(); + } + else if (make_unversioned) + { + from_sys_trx_end= from->vers_end_field(); + } + else if (keep_versioned) + { + to->file->vers_auto_decrement= 0xffffffffffffffff; + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + query_start= thd->query_start_TIME(); + from_sys_trx_end= from->vers_end_field(); + to_sys_trx_start= to->vers_start_field(); + } else if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_DROP) + { + from_sys_trx_end= from->vers_end_field(); + } + } + THD_STAGE_INFO(thd, stage_copy_to_tmp_table); /* Tell handler that we have values for all columns in the to table */ to->use_all_columns(); @@ -9842,6 +10124,36 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, { copy_ptr->do_copy(copy_ptr); } + + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_DROP && + from_sys_trx_end && !from_sys_trx_end->is_max()) + { + continue; + } + + if (make_versioned) + { + to_sys_trx_start->set_notnull(); + to_sys_trx_start->store_time(&query_start); + to_sys_trx_end->set_max(); + } + else if (make_unversioned) + { + if (!from_sys_trx_end->is_max()) + continue; // Drop history rows. + } + else if (keep_versioned && + thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + if (!from_sys_trx_end->is_max()) + continue; // Do not copy history rows. + + store_record(from, record[1]); + from->vers_end_field()->store_time(&query_start); + from->file->ha_update_row(from->record[1], from->record[0]); + to_sys_trx_start->store_time(&query_start); + } + prev_insert_id= to->file->next_insert_id; if (to->default_field) to->update_default_fields(0, ignore); @@ -9856,7 +10168,17 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, error= 1; break; } - error=to->file->ha_write_row(to->record[0]); + if (keep_versioned && to->versioned_by_engine() && + thd->variables.vers_alter_history != VERS_ALTER_HISTORY_SURVIVE) + { + to->s->versioned= false; + } + error= to->file->ha_write_row(to->record[0]); + if (keep_versioned && to->versioned_by_engine() && + thd->variables.vers_alter_history != VERS_ALTER_HISTORY_SURVIVE) + { + to->s->versioned= true; + } to->auto_increment_field_not_null= FALSE; if (error) { diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc index 93a3007d1ea..b07ec0b418c 100644 --- a/sql/sql_tablespace.cc +++ b/sql/sql_tablespace.cc @@ -22,6 +22,70 @@ #include "sql_table.h" // write_bin_log #include "sql_class.h" // THD +/** + Check if tablespace name is valid + + @param tablespace_name Name of the tablespace + + @note Tablespace names are not reflected in the file system, so + character case conversion or consideration is not relevant. + + @note Checking for path characters or ending space is not done. + The only checks are for identifier length, both in terms of + number of characters and number of bytes. + + @retval IDENT_NAME_OK Identifier name is ok (Success) + @retval IDENT_NAME_WRONG Identifier name is wrong, if length == 0 +* (ER_WRONG_TABLESPACE_NAME) + @retval IDENT_NAME_TOO_LONG Identifier name is too long if it is greater + than 64 characters (ER_TOO_LONG_IDENT) + + @note In case of IDENT_NAME_TOO_LONG or IDENT_NAME_WRONG, the function + reports an error (using my_error()). +*/ + +enum_ident_name_check check_tablespace_name(const char *tablespace_name) +{ + size_t name_length= 0; //< Length as number of bytes + size_t name_length_symbols= 0; //< Length as number of symbols + + // Name must be != NULL and length must be > 0 + if (!tablespace_name || (name_length= strlen(tablespace_name)) == 0) + { + my_error(ER_WRONG_TABLESPACE_NAME, MYF(0), tablespace_name); + return IDENT_NAME_WRONG; + } + + // If we do not have too many bytes, we must check the number of symbols, + // provided the system character set may use more than one byte per symbol. + if (name_length <= NAME_LEN && use_mb(system_charset_info)) + { + const char *name= tablespace_name; //< The actual tablespace name + const char *end= name + name_length; //< Pointer to first byte after name + + // Loop over all symbols as long as we don't have too many already + while (name != end && name_length_symbols <= NAME_CHAR_LEN) + { + int len= my_ismbchar(system_charset_info, name, end); + if (len) + name += len; + else + name++; + + name_length_symbols++; + } + } + + if (name_length_symbols > NAME_CHAR_LEN || name_length > NAME_LEN) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), tablespace_name); + return IDENT_NAME_TOO_LONG; + } + + return IDENT_NAME_OK; +} + + int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info) { int error= HA_ADMIN_NOT_IMPLEMENTED; diff --git a/sql/sql_tablespace.h b/sql/sql_tablespace.h index ae77d15cbcb..b97c64f7965 100644 --- a/sql/sql_tablespace.h +++ b/sql/sql_tablespace.h @@ -19,6 +19,41 @@ class THD; class st_alter_tablespace; +/** + Enumerate possible status of a identifier name while determining + its validity +*/ +enum enum_ident_name_check +{ + IDENT_NAME_OK, + IDENT_NAME_WRONG, + IDENT_NAME_TOO_LONG +}; + +/** + Check if tablespace name is valid + + @param tablespace_name Name of the tablespace + + @note Tablespace names are not reflected in the file system, so + character case conversion or consideration is not relevant. + + @note Checking for path characters or ending space is not done. + The only checks are for identifier length, both in terms of + number of characters and number of bytes. + + @retval IDENT_NAME_OK Identifier name is ok (Success) + @retval IDENT_NAME_WRONG Identifier name is wrong, if length == 0 + (ER_WRONG_TABLESPACE_NAME) + @retval IDENT_NAME_TOO_LONG Identifier name is too long if it is greater + than 64 characters (ER_TOO_LONG_IDENT) + + @note In case of IDENT_NAME_TOO_LONG or IDENT_NAME_WRONG, the function + reports an error (using my_error()). +*/ + +enum_ident_name_check check_tablespace_name(const char *tablespace_name); + int mysql_alter_tablespace(THD* thd, st_alter_tablespace *ts_info); #endif /* SQL_TABLESPACE_INCLUDED */ diff --git a/sql/sql_time.cc b/sql/sql_time.cc index 309ede45ecc..276540e9dba 100644 --- a/sql/sql_time.cc +++ b/sql/sql_time.cc @@ -475,6 +475,7 @@ void localtime_to_TIME(MYSQL_TIME *to, struct tm *from) to->second= (int) from->tm_sec; } + void calc_time_from_sec(MYSQL_TIME *to, long seconds, long microseconds) { long t_seconds; diff --git a/sql/sql_time.h b/sql/sql_time.h index 1832e4501ed..28a2e2f50d2 100644 --- a/sql/sql_time.h +++ b/sql/sql_time.h @@ -170,6 +170,7 @@ bool calc_time_diff(const MYSQL_TIME *l_time1, const MYSQL_TIME *l_time2, int lsign, MYSQL_TIME *l_time3, ulonglong fuzzydate); int my_time_compare(const MYSQL_TIME *a, const MYSQL_TIME *b); void localtime_to_TIME(MYSQL_TIME *to, struct tm *from); + void calc_time_from_sec(MYSQL_TIME *to, long seconds, long microseconds); uint calc_week(MYSQL_TIME *l_time, uint week_behaviour, uint *year); diff --git a/sql/sql_trigger.h b/sql/sql_trigger.h index 8847680c7b2..8cbb6c44c66 100644 --- a/sql/sql_trigger.h +++ b/sql/sql_trigger.h @@ -310,7 +310,7 @@ private: inline Field **TABLE::field_to_fill() { return triggers && triggers->nullable_fields() ? triggers->nullable_fields() - : field; + : non_generated_field ? non_generated_field : field; } diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index 1d6edbc5fc9..27e405cd6b9 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -27,7 +27,8 @@ #include "sql_truncate.h" #include "wsrep_mysqld.h" #include "sql_show.h" //append_identifier() - +#include "sql_select.h" +#include "sql_delete.h" /** Append a list of field names to a string. @@ -481,7 +482,6 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) DBUG_RETURN(error); } - /** Execute a TRUNCATE statement at runtime. @@ -493,13 +493,20 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) bool Sql_cmd_truncate_table::execute(THD *thd) { bool res= TRUE; - TABLE_LIST *first_table= thd->lex->select_lex.table_list.first; + TABLE_LIST *table= thd->lex->select_lex.table_list.first; DBUG_ENTER("Sql_cmd_truncate_table::execute"); - if (check_one_table_access(thd, DROP_ACL, first_table)) + if (table->vers_conditions) + { + if (check_one_table_access(thd, DELETE_VERSIONING_ROWS_ACL, table)) + DBUG_RETURN(res); + DBUG_RETURN(mysql_delete(thd, table, NULL, NULL, -1, 0, NULL)); + } + + if (check_one_table_access(thd, DROP_ACL, table)) DBUG_RETURN(res); - if (! (res= truncate_table(thd, first_table))) + if (! (res= truncate_table(thd, table))) my_ok(thd); DBUG_RETURN(res); diff --git a/sql/sql_type.cc b/sql/sql_type.cc index 62dcff33f1d..b23ee1b314d 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -658,7 +658,9 @@ Type_handler_hybrid_field_type::aggregate_for_comparison(const Type_handler *h) Item_result a= cmp_type(); Item_result b= h->cmp_type(); - if (a == STRING_RESULT && b == STRING_RESULT) + if (m_vers_trx_id && (a == STRING_RESULT || b == STRING_RESULT)) + m_type_handler= &type_handler_datetime; + else if (a == STRING_RESULT && b == STRING_RESULT) m_type_handler= &type_handler_long_blob; else if (a == INT_RESULT && b == INT_RESULT) m_type_handler= &type_handler_longlong; diff --git a/sql/sql_type.h b/sql/sql_type.h index d94c5a87811..1310666e667 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -2811,14 +2811,16 @@ public: class Type_handler_hybrid_field_type { const Type_handler *m_type_handler; + bool m_vers_trx_id; bool aggregate_for_min_max(const Type_handler *other); + public: Type_handler_hybrid_field_type(); Type_handler_hybrid_field_type(const Type_handler *handler) - :m_type_handler(handler) + :m_type_handler(handler), m_vers_trx_id(false) { } Type_handler_hybrid_field_type(const Type_handler_hybrid_field_type *other) - :m_type_handler(other->m_type_handler) + :m_type_handler(other->m_type_handler), m_vers_trx_id(other->m_vers_trx_id) { } const Type_handler *type_handler() const { return m_type_handler; } enum_field_types real_field_type() const diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 96f48be5ff2..e019d0f6ac4 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -44,6 +44,9 @@ // mysql_derived_filling +#include "sql_insert.h" // For vers_insert_history_row() that may be + // needed for System Versioning. + /** True if the table's input and output record buffers are comparable using compare_record(TABLE*). @@ -152,6 +155,17 @@ static bool check_fields(THD *thd, List<Item> &items) return FALSE; } +static bool check_has_vers_fields(List<Item> &items) +{ + List_iterator<Item> it(items); + while (Item *item= it++) + { + if (Item_field *item_field= item->field_for_view_update()) + if (!(item_field->field->flags & VERS_OPTIMIZED_UPDATE_FLAG)) + return true; + } + return false; +} /** Re-read record if more columns are needed for error message. @@ -281,6 +295,10 @@ int mysql_update(THD *thd, TABLE_LIST *update_source_table; query_plan.index= MAX_KEY; query_plan.using_filesort= FALSE; + + // For System Versioning (may need to insert new fields to a table). + ha_rows updated_sys_ver= 0; + DBUG_ENTER("mysql_update"); create_explain_query(thd->lex, thd->mem_root); @@ -351,12 +369,17 @@ int mysql_update(THD *thd, { DBUG_RETURN(1); } + bool has_vers_fields= + table->versioned() ? check_has_vers_fields(fields) : false; if (check_key_in_view(thd, table_list)) { my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE"); DBUG_RETURN(1); } + if (table->default_field) + table->mark_default_fields_for_write(false); + #ifndef NO_EMBEDDED_ACCESS_CHECKS /* Check values */ table_list->grant.want_privilege= table->grant.want_privilege= @@ -737,6 +760,11 @@ int mysql_update(THD *thd, while (!(error=info.read_record()) && !thd->killed) { + if (table->versioned() && !table->vers_end_field()->is_max()) + { + continue; + } + explain->tracker.on_record_read(); thd->inc_examined_row_count(1); if (!select || select->skip_record(thd) > 0) @@ -746,10 +774,14 @@ int mysql_update(THD *thd, explain->tracker.on_record_after_where(); store_record(table,record[1]); + if (fill_record_n_invoke_before_triggers(thd, table, fields, values, 0, TRG_EVENT_UPDATE)) break; /* purecov: inspected */ + if (has_vers_fields && table->versioned_by_sql()) + table->vers_update_fields(); + found++; if (!can_compare_record || compare_record(table)) @@ -808,19 +840,35 @@ int mysql_update(THD *thd, else { /* Non-batched update */ - error= table->file->ha_update_row(table->record[1], + error= table->file->ha_update_row(table->record[1], table->record[0]); } - if (!error || error == HA_ERR_RECORD_IS_THE_SAME) - { - if (error != HA_ERR_RECORD_IS_THE_SAME) - updated++; - else - error= 0; - } - else if (!ignore || + if (error == HA_ERR_RECORD_IS_THE_SAME) + { + error= 0; + } + else if (!error) + { + updated++; + + if (has_vers_fields && table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if ((error = vers_insert_history_row(table))) + { + restore_record(table, record[2]); + break; + } + restore_record(table, record[2]); + } + updated_sys_ver++; + } + } + else if (!ignore || table->file->is_fatal_error(error, HA_CHECK_ALL)) - { + { /* If (ignore && error is ignorable) we don't have to do anything; otherwise... @@ -991,6 +1039,9 @@ int mysql_update(THD *thd, else errcode= query_error_code(thd, killed_status == NOT_KILLED); + ScopedStatementReplication scoped_stmt_rpl( + table->versioned_by_engine() ? thd : NULL); + if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), transactional_table, FALSE, FALSE, errcode)) @@ -1010,9 +1061,15 @@ int mysql_update(THD *thd, if (error < 0 && !thd->lex->analyze_stmt) { char buff[MYSQL_ERRMSG_SIZE]; - my_snprintf(buff, sizeof(buff), ER_THD(thd, ER_UPDATE_INFO), (ulong) found, - (ulong) updated, - (ulong) thd->get_stmt_da()->current_statement_warn_count()); + if (!table->versioned_by_sql()) + my_snprintf(buff, sizeof(buff), ER_THD(thd, ER_UPDATE_INFO), (ulong) found, + (ulong) updated, + (ulong) thd->get_stmt_da()->current_statement_warn_count()); + else + my_snprintf(buff, sizeof(buff), + ER_THD(thd, ER_UPDATE_INFO_WITH_SYSTEM_VERSIONING), + (ulong) found, (ulong) updated, (ulong) updated_sys_ver, + (ulong) thd->get_stmt_da()->current_statement_warn_count()); my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated, id, buff); DBUG_PRINT("info",("%ld records updated", (long) updated)); @@ -1619,8 +1676,11 @@ multi_update::multi_update(THD *thd_arg, TABLE_LIST *table_list, tmp_tables(0), updated(0), found(0), fields(field_list), values(value_list), table_count(0), copy_field(0), handle_duplicates(handle_duplicates_arg), do_update(1), trans_safe(1), - transactional_tables(0), ignore(ignore_arg), error_handled(0), prepared(0) -{} + transactional_tables(0), ignore(ignore_arg), error_handled(0), prepared(0), + updated_sys_ver(0) +{ + has_vers_fields= check_has_vers_fields(*field_list); +} /* @@ -1871,7 +1931,7 @@ static bool safe_update_on_fly(THD *thd, JOIN_TAB *join_tab, return !is_key_used(table, table->s->primary_key, table->write_set); return TRUE; default: - break; // Avoid compler warning + break; // Avoid compiler warning } return FALSE; @@ -2096,6 +2156,11 @@ int multi_update::send_data(List<Item> ¬_used_values) if (table->status & (STATUS_NULL_ROW | STATUS_UPDATED)) continue; + if (table->versioned() && !table->vers_end_field()->is_max()) + { + continue; + } + if (table == table_to_update) { /* @@ -2108,6 +2173,7 @@ int multi_update::send_data(List<Item> ¬_used_values) table->status|= STATUS_UPDATED; store_record(table,record[1]); + if (fill_record_n_invoke_before_triggers(thd, table, *fields_for_table[offset], *values_for_table[offset], 0, @@ -2126,6 +2192,9 @@ int multi_update::send_data(List<Item> ¬_used_values) if (table->default_field && table->update_default_fields(1, ignore)) DBUG_RETURN(1); + if (has_vers_fields && table->versioned_by_sql()) + table->vers_update_fields(); + if ((error= cur_table->view_check_option(thd, ignore)) != VIEW_CHECK_OK) { @@ -2173,6 +2242,21 @@ int multi_update::send_data(List<Item> ¬_used_values) error= 0; updated--; } + else if (has_vers_fields && table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if (vers_insert_history_row(table)) + { + restore_record(table, record[2]); + error= 1; + break; + } + restore_record(table, record[2]); + } + updated_sys_ver++; + } /* non-transactional or transactional table got modified */ /* either multi_update class' flag is raised in its branch */ if (table->file->has_transactions()) @@ -2199,6 +2283,7 @@ int multi_update::send_data(List<Item> ¬_used_values) */ uint field_num= 0; List_iterator_fast<TABLE> tbl_it(unupdated_check_opt_tables); + /* Set first tbl = table and then tbl to tables from tbl_it */ TABLE *tbl= table; do { @@ -2261,10 +2346,6 @@ void multi_update::abort_result_set() if (do_update && table_count > 1) { /* Add warning here */ - /* - todo/fixme: do_update() is never called with the arg 1. - should it change the signature to become argless? - */ (void) do_updates(); } } @@ -2466,19 +2547,40 @@ int multi_update::do_updates() goto err2; } } - if ((local_error=table->file->ha_update_row(table->record[1], - table->record[0])) && + if (has_vers_fields && table->versioned_by_sql()) + table->vers_update_fields(); + + if ((local_error=table->file->ha_update_row(table->record[1], + table->record[0])) && local_error != HA_ERR_RECORD_IS_THE_SAME) { if (!ignore || table->file->is_fatal_error(local_error, HA_CHECK_ALL)) { err_table= table; - goto err; + goto err; } - } + } if (local_error != HA_ERR_RECORD_IS_THE_SAME) + { updated++; + + if (has_vers_fields && table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if ((local_error= vers_insert_history_row(table))) + { + restore_record(table, record[2]); + err_table = table; + goto err; + } + restore_record(table, record[2]); + } + updated_sys_ver++; + } + } else local_error= 0; } @@ -2594,9 +2696,21 @@ bool multi_update::send_eof() thd->clear_error(); else errcode= query_error_code(thd, killed_status == NOT_KILLED); - if (thd->binlog_query(THD::ROW_QUERY_TYPE, - thd->query(), thd->query_length(), - transactional_tables, FALSE, FALSE, errcode)) + + bool force_stmt= false; + for (TABLE *table= all_tables->table; table; table= table->next) + { + if (table->versioned_by_engine()) + { + force_stmt= true; + break; + } + } + ScopedStatementReplication scoped_stmt_rpl(force_stmt ? thd : NULL); + + if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), + thd->query_length(), transactional_tables, FALSE, + FALSE, errcode)) { local_error= 1; // Rollback update } diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 2d8129fd223..0b7f8f50ea3 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -455,6 +455,140 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, goto err; } + for (SELECT_LEX *sl= select_lex; sl; sl= sl->next_select()) + { /* System Versioning: fix system fields of versioned view */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat" +#pragma GCC diagnostic ignored "-Wformat-extra-args" + // Similar logic as in mysql_derived_prepare() + // Leading versioning table detected implicitly (first one selected) + TABLE_LIST *impli_table= NULL; + // Leading versioning table specified explicitly + // (i.e. if at least one system field is selected) + TABLE_LIST *expli_table= NULL; + const LString_i *impli_start, *impli_end; + Item_field *expli_start= NULL, *expli_end= NULL; + + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + DBUG_ASSERT(!table->is_view() || table->view); + + // Any versioned table in VIEW will add `FOR SYSTEM_TIME ALL` + WHERE: + // if there are at least one versioned table then VIEW will contain FOR_SYSTEM_TIME_ALL + // (because it is in fact LEX used to parse its SELECT). + if (table->is_view() && table->view->vers_conditions == FOR_SYSTEM_TIME_ALL) + { + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW %`s is prohibited: versioned VIEW %`s in query!", MYF(0), + view->table_name, + table->table_name); + res= true; + goto err; + } + + if (!table->table || !table->table->versioned()) + continue; + + const LString_i table_start= table->table->vers_start_field()->field_name; + const LString_i table_end= table->table->vers_end_field()->field_name; + + if (!impli_table) + { + impli_table= table; + impli_start= &table_start; + impli_end= &table_end; + } + + /* Implicitly add versioning fields if needed */ + Item *item; + List_iterator_fast<Item> it(sl->item_list); + + DBUG_ASSERT(table->alias); + while ((item= it++)) + { + if (item->real_item()->type() != Item::FIELD_ITEM) + continue; + Item_field *fld= (Item_field*) item->real_item(); + if (fld->table_name && 0 != my_strcasecmp(table_alias_charset, table->alias, fld->table_name)) + continue; + DBUG_ASSERT(fld->field_name.str); + if (table_start == fld->field_name) + { + if (expli_start) + { + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW %`s is prohibited: multiple start system fields `%s.%s`, `%s.%s` in query!", MYF(0), + view->table_name, + expli_table->alias, + expli_start->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto err; + } + if (expli_table) + { + if (expli_table != table) + { +expli_table_err: + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW %`s is prohibited: system fields from multiple tables %`s, %`s in query!", MYF(0), + view->table_name, + expli_table->alias, + table->alias); + res= true; + goto err; + } + } + else + expli_table= table; + expli_start= fld; + impli_end= &table_end; + } + else if (table_end == fld->field_name) + { + if (expli_end) + { + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW %`s is prohibited: multiple end system fields `%s.%s`, `%s.%s` in query!", MYF(0), + view->table_name, + expli_table->alias, + expli_end->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto err; + } + if (expli_table) + { + if (expli_table != table) + goto expli_table_err; + } + else + expli_table= table; + expli_end= fld; + impli_start= &table_start; + } + } // while ((item= it++)) + } // for (TABLE_LIST *table) + + if (expli_table) + impli_table= expli_table; + + if (impli_table) + { + if (!expli_start && sl->vers_push_field(thd, impli_table, *impli_start)) + goto err; + if (!expli_end && sl->vers_push_field(thd, impli_table, *impli_end)) + goto err; + } +#pragma GCC diagnostic pop + } /* System Versioning end */ + view= lex->unlink_first_table(&link_to_local); if (check_db_dir_existence(view->db)) @@ -607,14 +741,22 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, view->table_name, item->name.str) & VIEW_ANY_ACL); - if (fld && !fld->field->table->s->tmp_table) + if (!fld) + continue; + TABLE_SHARE *s= fld->field->table->s; + const LString_i field_name= fld->field->field_name; + if (s->tmp_table || + (s->versioned && + (field_name == s->vers_start_field()->field_name || + field_name == s->vers_end_field()->field_name))) { + continue; + } - final_priv&= fld->have_privileges; + final_priv&= fld->have_privileges; - if (~fld->have_privileges & priv) - report_item= item; - } + if (~fld->have_privileges & priv) + report_item= item; } } @@ -2017,7 +2159,7 @@ bool check_key_in_view(THD *thd, TABLE_LIST *view) RETURN FALSE OK - TRUE error (is not sent to cliet) + TRUE error (is not sent to client) */ bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view) @@ -2034,7 +2176,15 @@ bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view) { Item_field *fld; if ((fld= entry->item->field_for_view_update())) + { + TABLE_SHARE *s= fld->context->table_list->table->s; + LString_i field_name= fld->field_name; + if (s->versioned && + (field_name == s->vers_start_field()->field_name || + field_name == s->vers_end_field()->field_name)) + continue; list->push_back(fld, thd->mem_root); + } else { my_error(ER_NON_INSERTABLE_TABLE, MYF(0), view->alias, "INSERT"); @@ -2045,7 +2195,7 @@ bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view) } /* - checking view md5 check suum + checking view md5 check sum SINOPSYS view_checksum() diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 67b73dea506..22dfacc59d8 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -67,6 +67,7 @@ #include "lex_token.h" #include "sql_lex.h" #include "sql_sequence.h" +#include "vers_utils.h" /* this is to get the bison compilation windows warnings out */ #ifdef _MSC_VER @@ -752,6 +753,7 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) %} %union { + bool BOOL; int num; ulong ulong_num; ulonglong ulonglong_number; @@ -856,6 +858,8 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) enum Window_frame::Frame_exclusion frame_exclusion; enum trigger_order_type trigger_action_order_type; DDL_options_st object_ddl_options; + enum vers_range_unit_t vers_range_unit; + enum Column_definition::enum_column_versioning vers_column_versioning; } %{ @@ -866,10 +870,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %parse-param { THD *thd } %lex-param { THD *thd } /* - Currently there are 102 shift/reduce conflicts. + Currently there are 115 shift/reduce conflicts. We should not introduce new conflicts any more. */ -%expect 102 +%expect 115 /* Comments for TOKENS. @@ -885,6 +889,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); INTERNAL : Not a real token, lex optimization OPERATOR : SQL operator FUTURE-USE : Reserved for future use + 32N2439 : Reserver keywords per ISO/IEC PDTR 19075-2, + http://jtc1sc32.org/doc/N2401-2450/32N2439-text_for_ballot-PDTR_19075-2.pdf + System Versioned Tables This makes the code grep-able, and helps maintenance. */ @@ -1098,6 +1105,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token FORCE_SYM %token FOREIGN /* SQL-2003-R */ %token FOR_SYM /* SQL-2003-R */ +%token FOR_SYSTEM_TIME_SYM /* INTERNAL */ %token FORMAT_SYM %token FOUND_SYM /* SQL-2003-R */ %token FROM @@ -1328,6 +1336,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token PARTITIONING_SYM %token PASSWORD_SYM %token PERCENT_RANK_SYM +%token PERIOD_SYM /* 32N2439 */ %token PERSISTENT_SYM %token PHASE_SYM %token PLUGINS_SYM @@ -1494,6 +1503,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SWAPS_SYM %token SWITCHES_SYM %token SYSDATE +%token SYSTEM /* 32N2439 */ +%token SYSTEM_TIME_SYM /* 32N2439 */ %token TABLES %token TABLESPACE %token TABLE_REF_PRIORITY @@ -1562,6 +1573,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token VARIANCE_SYM %token VARYING /* SQL-2003-R */ %token VAR_SAMP_SYM +%token VERSIONING_SYM /* 32N2439 */ %token VIA_SYM %token VIEW_SYM /* SQL-2003-N */ %token VIRTUAL_SYM @@ -1574,8 +1586,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WINDOW_SYM %token WHILE_SYM %token WITH /* SQL-2003-R */ +%token WITHOUT /* SQL-2003-R */ %token WITH_CUBE_SYM /* INTERNAL */ %token WITH_ROLLUP_SYM /* INTERNAL */ +%token WITH_SYSTEM_SYM /* INTERNAL */ %token WORK_SYM /* SQL-2003-N */ %token WRAPPER_SYM %token WRITE_SYM /* SQL-2003-N */ @@ -1899,12 +1913,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); keep_gcc_happy key_using_alg part_column_list + period_for_system_time server_def server_options_list server_option definer_opt no_definer definer get_diagnostics parse_vcol_expr vcol_opt_specifier vcol_opt_attribute vcol_opt_attribute_list vcol_attribute opt_serial_attribute opt_serial_attribute_list serial_attribute - explainable_command opt_lock_wait_timeout + explainable_command opt_lock_wait_timeout asrow_attribute END_OF_INPUT %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt @@ -1925,6 +1940,7 @@ END_OF_INPUT %type <num> sp_decl_idents sp_decl_idents_init_vars %type <num> sp_handler_type sp_hcond_list +%type <num> start_or_end %type <spcondvalue> sp_cond sp_hcond sqlstate signal_value opt_signal_value %type <spblock> sp_decls sp_decl sp_decl_body sp_decl_variable_list %type <spname> sp_name @@ -1960,7 +1976,6 @@ END_OF_INPUT %type <frame_exclusion> opt_window_frame_exclusion; %type <window_frame_bound> window_frame_start window_frame_bound; - %type <NONE> '-' '+' '*' '/' '%' '(' ')' ',' '!' '{' '}' '&' '|' AND_SYM OR_SYM OR_OR_SYM BETWEEN_SYM CASE_SYM @@ -1973,6 +1988,9 @@ END_OF_INPUT %type <lex_str_list> opt_with_column_list +%type <vers_range_unit> opt_trans_or_timestamp +%type <BOOL> opt_for_system_time_clause +%type <vers_column_versioning> with_or_without_system %% @@ -2508,7 +2526,7 @@ create: sequence_definition())) MYSQL_YYABORT; } - opt_sequence opt_create_table_options + opt_sequence opt_create_sequence_options { LEX *lex= thd->lex; @@ -4819,7 +4837,7 @@ create_like: opt_create_select: /* empty */ {} - | opt_duplicate opt_as create_select_query_expression + | opt_duplicate opt_as create_select_query_expression opt_versioning_option ; create_select_query_expression: @@ -4959,6 +4977,10 @@ part_type_def: { Lex->part_info->part_type= LIST_PARTITION; } | LIST_SYM part_column_list { Lex->part_info->part_type= LIST_PARTITION; } + | SYSTEM_TIME_SYM + { if (Lex->part_info->vers_init_info(thd)) MYSQL_YYABORT; } + opt_versioning_interval + opt_versioning_limit ; opt_linear: @@ -5166,6 +5188,7 @@ part_definition: MYSQL_YYABORT; } p_elem->part_state= PART_NORMAL; + p_elem->id= part_info->partitions.elements - 1; part_info->curr_part_elem= p_elem; part_info->current_partition= p_elem; part_info->use_default_partitions= FALSE; @@ -5234,6 +5257,62 @@ opt_part_values: part_info->part_type= LIST_PARTITION; } part_values_in {} + | AS OF_SYM NOW_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *elem= part_info->curr_part_elem; + if (! lex->is_partition_management()) + { + if (part_info->part_type != VERSIONING_PARTITION) + my_yyabort_error((ER_PARTITION_WRONG_TYPE, MYF(0), + "BY SYSTEM_TIME")); + } + else + { + DBUG_ASSERT(Lex->create_last_non_select_table); + DBUG_ASSERT(Lex->create_last_non_select_table->table_name); + // FIXME: other ALTER commands? + my_yyabort_error((ER_VERS_WRONG_PARTS, MYF(0), + Lex->create_last_non_select_table->table_name)); + } + elem->type(partition_element::AS_OF_NOW); + DBUG_ASSERT(part_info->vers_info); + part_info->vers_info->now_part= elem; + if (part_info->init_column_part(thd)) + { + MYSQL_YYABORT; + } + } + | VERSIONING_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *elem= part_info->curr_part_elem; + if (! lex->is_partition_management()) + { + if (part_info->part_type != VERSIONING_PARTITION) + my_yyabort_error((ER_PARTITION_WRONG_TYPE, MYF(0), + "BY SYSTEM_TIME")); + } + else + { + part_info->vers_init_info(thd); + elem->id= UINT32_MAX; + } + DBUG_ASSERT(part_info->vers_info); + if (part_info->vers_info->now_part) + { + DBUG_ASSERT(Lex->create_last_non_select_table); + DBUG_ASSERT(Lex->create_last_non_select_table->table_name); + my_yyabort_error((ER_VERS_WRONG_PARTS, MYF(0), Lex->create_last_non_select_table->table_name)); + } + elem->type(partition_element::VERSIONING); + if (part_info->init_column_part(thd)) + { + MYSQL_YYABORT; + } + } | DEFAULT { LEX *lex= Lex; @@ -5479,6 +5558,7 @@ sub_part_definition: mem_alloc_error(sizeof(partition_element)); MYSQL_YYABORT; } + sub_p_elem->id= curr_part->subpartitions.elements - 1; part_info->curr_part_elem= sub_p_elem; part_info->use_default_subpartitions= FALSE; part_info->use_default_num_subpartitions= FALSE; @@ -5535,6 +5615,38 @@ opt_part_option: { Lex->part_info->curr_part_elem->part_comment= $3.str; } ; +opt_versioning_interval: + /* empty */ {} + | INTERVAL_SYM expr interval + { + partition_info *part_info= Lex->part_info; + DBUG_ASSERT(part_info->part_type == VERSIONING_PARTITION); + INTERVAL interval; + if (get_interval_value($2, $3, &interval) || + part_info->vers_set_interval(interval)) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_PART_WRONG_VALUE, MYF(0), + Lex->create_last_non_select_table->table_name, "INTERVAL"); + MYSQL_YYABORT; + } + } + ; + +opt_versioning_limit: + /* empty */ {} + | LIMIT ulonglong_num + { + partition_info *part_info= Lex->part_info; + DBUG_ASSERT(part_info->part_type == VERSIONING_PARTITION); + if (part_info->vers_set_limit($2)) + { + my_error_as(ER_VERS_WRONG_PARAMS, ER_PART_WRONG_VALUE, MYF(0), + Lex->create_last_non_select_table->table_name, "LIMIT"); + MYSQL_YYABORT; + } + } + ; + /* End of partition parser part */ @@ -5642,14 +5754,19 @@ create_or_replace: } ; -opt_create_table_options: +opt_create_sequence_options: /* empty */ | create_table_options ; -create_table_options_space_separated: - create_table_option - | create_table_option create_table_options_space_separated +opt_create_table_options: + /* empty */ + | create_table_options_versioning + ; + +alter_table_options: + create_table_option_versioning + | create_table_option_versioning alter_table_options ; create_table_options: @@ -5658,6 +5775,12 @@ create_table_options: | create_table_option ',' create_table_options ; +create_table_options_versioning: + create_table_option_versioning + | create_table_option_versioning create_table_options_versioning + | create_table_option_versioning ',' create_table_options_versioning + ; + create_table_option: ENGINE_SYM opt_equal storage_engines { @@ -5901,6 +6024,28 @@ create_table_option: { Lex->create_info.used_fields|= HA_CREATE_USED_SEQUENCE; Lex->create_info.sequence= ($3 == HA_CHOICE_YES); + } + ; + +create_table_option_versioning: + create_table_option + | versioning_option + ; + +opt_versioning_option: + /* empty */ + | versioning_option + ; + +versioning_option: + WITH_SYSTEM_SYM VERSIONING_SYM + { + Lex->vers_get_info().with_system_versioning= true; + Lex->create_info.options|= HA_VERSIONED_TABLE; + } + | WITHOUT SYSTEM VERSIONING_SYM + { + Lex->vers_get_info().without_system_versioning= true; } ; @@ -6001,6 +6146,7 @@ field_list_item: column_def { } | key_def | constraint_def + | period_for_system_time ; column_def: @@ -6100,6 +6246,15 @@ constraint_def: } ; +period_for_system_time: + // If FOR_SYM is followed by SYSTEM_TIME_SYM then they are merged to: FOR_SYSTEM_TIME_SYM . + PERIOD_SYM FOR_SYSTEM_TIME_SYM '(' ident ',' ident ')' + { + Vers_parse_info &info= Lex->vers_get_info(); + info.set_period_for_system_time($4, $6); + } + ; + opt_check_constraint: /* empty */ { $$= (Virtual_column_info*) 0; } | check_constraint { $$= $1;} @@ -6185,6 +6340,15 @@ opt_serial_attribute_list: | serial_attribute ; +opt_asrow_attribute: + /* empty */ {} + | opt_asrow_attribute_list {} + ; + +opt_asrow_attribute_list: + opt_asrow_attribute_list asrow_attribute {} + | asrow_attribute + ; field_def: opt_attribute @@ -6194,6 +6358,44 @@ field_def: Lex->last_field->flags&= ~NOT_NULL_FLAG; // undo automatic NOT NULL for timestamps } vcol_opt_specifier vcol_opt_attribute + | opt_generated_always AS ROW_SYM start_or_end opt_asrow_attribute + { + LEX *lex= Lex; + Vers_parse_info &info= lex->vers_get_info(); + const LEX_CSTRING &field_name= lex->last_field->field_name; + + LString_i *p; + const char* clause; + switch ($4) + { + case 1: + p= &info.as_row.start; + clause= "AS ROW START"; + lex->last_field->flags|= VERS_SYS_START_FLAG; + break; + case 0: + p= &info.as_row.end; + clause= "AS ROW END"; + lex->last_field->flags|= VERS_SYS_END_FLAG; + break; + default: + /* Not Reachable */ + MYSQL_YYABORT; + break; + } + DBUG_ASSERT(p); + *p= field_name; + if (lex->last_field->implicit_not_null) + { + lex->last_field->flags&= ~NOT_NULL_FLAG; + lex->last_field->implicit_not_null= false; + } + } + ; + +start_or_end: + START_SYM { $$ = 1; } + | END { $$ = 0; } ; opt_generated_always: @@ -6426,7 +6628,10 @@ field_type_temporal: Unless --explicit-defaults-for-timestamp is given. */ if (!opt_explicit_defaults_for_timestamp) + { Lex->last_field->flags|= NOT_NULL_FLAG; + Lex->last_field->implicit_not_null= true; + } $$.set(opt_mysql56_temporal_format ? static_cast<const Type_handler*>(&type_handler_timestamp2): static_cast<const Type_handler*>(&type_handler_timestamp), @@ -6617,7 +6822,11 @@ opt_attribute_list: ; attribute: - NULL_SYM { Lex->last_field->flags&= ~ NOT_NULL_FLAG; } + NULL_SYM + { + Lex->last_field->flags&= ~ NOT_NULL_FLAG; + Lex->last_field->implicit_not_null= false; + } | DEFAULT column_default_expr { Lex->last_field->default_value= $2; } | ON UPDATE_SYM NOW_SYM opt_default_time_precision { @@ -6653,15 +6862,35 @@ opt_compression_method: | equal ident { $$= $2.str; } ; -serial_attribute: - not NULL_SYM { Lex->last_field->flags|= NOT_NULL_FLAG; } +asrow_attribute: + not NULL_SYM + { + Lex->last_field->flags|= NOT_NULL_FLAG; + Lex->last_field->implicit_not_null= false; + } | opt_primary KEY_SYM { LEX *lex=Lex; lex->last_field->flags|= PRI_KEY_FLAG | NOT_NULL_FLAG; lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; } - | vcol_attribute + | UNIQUE_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= UNIQUE_KEY_FLAG; + lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; + } + | UNIQUE_SYM KEY_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= UNIQUE_KEY_FLAG; + lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; + } + | COMMENT_SYM TEXT_STRING_sys { Lex->last_field->comment= $2; } + ; + +serial_attribute: + asrow_attribute | IDENT_sys equal TEXT_STRING_sys { if ($3.length > ENGINE_OPTION_MAX_LENGTH) @@ -6689,6 +6918,24 @@ serial_attribute: new (thd->mem_root) engine_option_value($1, &Lex->last_field->option_list, &Lex->option_list_last); } + | with_or_without_system VERSIONING_SYM + { + Lex->last_field->versioning= $1; + Lex->create_info.options|= HA_VERSIONED_TABLE; + } + ; + +with_or_without_system: + WITH_SYSTEM_SYM + { + Lex->create_info.vers_info.versioned_fields= true; + $$= Column_definition::WITH_VERSIONING; + } + | WITHOUT SYSTEM + { + Lex->create_info.vers_info.unversioned_fields= true; + $$= Column_definition::WITHOUT_VERSIONING; + } ; @@ -7647,6 +7894,9 @@ alter_list_item: Lex->create_last_non_select_table= Lex->last_table(); Lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; } + | ADD period_for_system_time + { + } | add_column '(' create_field_list ')' { Lex->alter_info.flags|= Alter_info::ALTER_ADD_COLUMN | @@ -7790,7 +8040,7 @@ alter_list_item: MYSQL_YYABORT; Lex->alter_info.flags|= Alter_info::ALTER_OPTIONS; } - | create_table_options_space_separated + | alter_table_options { LEX *lex=Lex; lex->alter_info.flags|= Alter_info::ALTER_OPTIONS; @@ -8666,6 +8916,7 @@ table_expression: opt_group_clause opt_having_clause opt_window_clause + opt_system_time_clause ; opt_table_expression: @@ -8700,6 +8951,85 @@ select_options: } ; +opt_trans_or_timestamp: + /* empty */ + { + $$ = UNIT_AUTO; + } + | TRANSACTION_SYM + { + $$ = UNIT_TRX_ID; + } + | TIMESTAMP + { + $$ = UNIT_TIMESTAMP; + } + ; + +opt_system_time_clause: + /* empty */ + {} + | SYSTEM_TIME_SYM system_time_expr + { + DBUG_ASSERT(Select); + int used= 0; + if (Lex->vers_conditions) + { + for (TABLE_LIST *table= Select->table_list.first; table; table= table->next_local) + { + if (!table->vers_conditions) + { + table->vers_conditions= Lex->vers_conditions; + used++; + } + } + if (!used) + { + my_yyabort_error((ER_VERS_UNUSED_CLAUSE, MYF(0), "SYSTEM_TIME")); + } + } + } + ; + +opt_for_system_time_clause: + /* empty */ + { + $$= false; + } + | FOR_SYSTEM_TIME_SYM system_time_expr + { + $$= true; + } + ; + +system_time_expr: + AS OF_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_AS_OF, $3, $4); + } + | AS OF_SYM NOW_SYM + { + Item *item= new (thd->mem_root) Item_func_now_local(thd, 6); + if (item == NULL) + MYSQL_YYABORT; + Lex->vers_conditions.init(FOR_SYSTEM_TIME_AS_OF, UNIT_TIMESTAMP, item); + } + | ALL + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_ALL); + } + | FROM opt_trans_or_timestamp simple_expr + TO_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_FROM_TO, $2, $3, $5, $6); + } + | BETWEEN_SYM opt_trans_or_timestamp simple_expr + AND_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_BETWEEN, $2, $3, $5, $6); + } + ; + select_option_list: select_option_list select_option | select_option @@ -11100,12 +11430,13 @@ table_factor: table_primary_ident: { + DBUG_ASSERT(Select); SELECT_LEX *sel= Select; sel->table_join_options= 0; } - table_ident opt_use_partition opt_table_alias opt_key_definition + table_ident opt_use_partition opt_for_system_time_clause opt_table_alias opt_key_definition { - if (!($$= Select->add_table_to_list(thd, $2, $4, + if (!($$= Select->add_table_to_list(thd, $2, $5, Select->get_table_join_options(), YYPS->m_lock_type, YYPS->m_mdl_type, @@ -11113,6 +11444,8 @@ table_primary_ident: $3))) MYSQL_YYABORT; Select->add_joined_table($$); + if ($4) + $$->vers_conditions= Lex->vers_conditions; } ; @@ -11135,11 +11468,11 @@ table_primary_ident: */ table_primary_derived: - '(' get_select_lex select_derived_union ')' opt_table_alias + '(' get_select_lex select_derived_union ')' opt_for_system_time_clause opt_table_alias { /* Use $2 instead of Lex->current_select as derived table will alter value of Lex->current_select. */ - if (!($3 || $5) && $2->embedding && + if (!($3 || $6) && $2->embedding && !$2->embedding->nested_join->join_list.elements) { /* we have a derived table ($3 == NULL) but no alias, @@ -11162,7 +11495,7 @@ table_primary_derived: if (ti == NULL) MYSQL_YYABORT; if (!($$= sel->add_table_to_list(thd, - ti, $5, 0, + ti, $6, 0, TL_READ, MDL_SHARED_READ))) MYSQL_YYABORT; @@ -11170,7 +11503,7 @@ table_primary_derived: lex->pop_context(); lex->nest_level--; } - else if ($5 != NULL) + else if ($6 != NULL) { /* Tables with or without joins within parentheses cannot @@ -11194,11 +11527,13 @@ table_primary_derived: if ($$ && $$->derived && !$$->derived->first_select()->next_select()) $$->select_lex->add_where_field($$->derived->first_select()); + if ($5) + $$->vers_conditions= Lex->vers_conditions; } /* Represents derived table with WITH clause */ | '(' get_select_lex subselect_start with_clause query_expression_body - subselect_end ')' opt_table_alias + subselect_end ')' opt_for_system_time_clause opt_table_alias { LEX *lex=Lex; SELECT_LEX *sel= $2; @@ -11209,10 +11544,12 @@ table_primary_derived: $5->set_with_clause($4); lex->current_select= sel; if (!($$= sel->add_table_to_list(lex->thd, - ti, $8, 0, + ti, $9, 0, TL_READ, MDL_SHARED_READ))) MYSQL_YYABORT; sel->add_joined_table($$); + if ($8) + $$->vers_conditions= Lex->vers_conditions; } ; @@ -12810,8 +13147,17 @@ opt_delete_option: | IGNORE_SYM { Lex->ignore= 1; } ; +truncate_end: + opt_lock_wait_timeout + | TO_SYM SYSTEM_TIME_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_BEFORE, $3, $4); + Lex->last_table()->vers_conditions= Lex->vers_conditions; + } + ; + truncate: - TRUNCATE_SYM opt_table_sym + TRUNCATE_SYM { LEX* lex= Lex; lex->sql_command= SQLCOM_TRUNCATE; @@ -12822,7 +13168,7 @@ truncate: YYPS->m_lock_type= TL_WRITE; YYPS->m_mdl_type= MDL_EXCLUSIVE; } - table_name opt_lock_wait_timeout + opt_table_sym table_name truncate_end { LEX* lex= thd->lex; DBUG_ASSERT(!lex->m_sql_cmd); @@ -13117,13 +13463,21 @@ show_param: Lex->set_command(SQLCOM_SHOW_CREATE_DB, $3); Lex->name= $4; } - | CREATE TABLE_SYM table_ident + | CREATE TABLE_SYM table_ident opt_for_system_time_clause { LEX *lex= Lex; lex->sql_command = SQLCOM_SHOW_CREATE; if (!lex->select_lex.add_table_to_list(thd, $3, NULL,0)) MYSQL_YYABORT; lex->create_info.storage_media= HA_SM_DEFAULT; + + if (lex->vers_conditions.type != FOR_SYSTEM_TIME_UNSPECIFIED && + lex->vers_conditions.type != FOR_SYSTEM_TIME_AS_OF) + { + my_yyabort_error((ER_VERS_RANGE_PROHIBITED, MYF(0))); + } + if ($4) + Lex->last_table()->vers_conditions= Lex->vers_conditions; } | CREATE VIEW_SYM table_ident { @@ -15399,6 +15753,12 @@ set_expr_or_default: if ($$ == NULL) MYSQL_YYABORT; } + | DROP + { + $$=new (thd->mem_root) Item_string_sys(thd, "DROP", 4); + if ($$ == NULL) + MYSQL_YYABORT; + } ; /* Lock function */ @@ -15781,6 +16141,7 @@ object_privilege: | EVENT_SYM { Lex->grant |= EVENT_ACL;} | TRIGGER_SYM { Lex->grant |= TRIGGER_ACL; } | CREATE TABLESPACE { Lex->grant |= CREATE_TABLESPACE_ACL; } + | DELETE_SYM VERSIONING_SYM ROWS_SYM { Lex->grant |= DELETE_VERSIONING_ROWS_ACL; } ; opt_and: @@ -16532,9 +16893,14 @@ trigger_tail: } table_ident /* $10 */ FOR_SYM - remember_name /* $12 */ - { /* $13 */ - Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start(); + remember_name /* $13 */ + { /* $14 */ + /* + FOR token is already passed through (see 'case FOR_SYM' in sql_lex.cc), + so we use _prev() to get it back. + */ + DBUG_ASSERT(YYLIP->lookahead_token >= 0); + Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start_prev(); } EACH_SYM ROW_SYM diff --git a/sql/sql_yacc_ora.yy b/sql/sql_yacc_ora.yy index db45414fd28..b8c60ff8830 100644 --- a/sql/sql_yacc_ora.yy +++ b/sql/sql_yacc_ora.yy @@ -507,6 +507,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token FORCE_SYM %token FOREIGN /* SQL-2003-R */ %token FOR_SYM /* SQL-2003-R */ +%token FOR_SYSTEM_TIME_SYM /* INTERNAL */ %token FORMAT_SYM %token FOUND_SYM /* SQL-2003-R */ %token FROM @@ -737,6 +738,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token PARTITIONING_SYM %token PASSWORD_SYM %token PERCENT_RANK_SYM +%token PERIOD_SYM /* 32N2439 */ %token PERSISTENT_SYM %token PHASE_SYM %token PLUGINS_SYM @@ -903,6 +905,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SWAPS_SYM %token SWITCHES_SYM %token SYSDATE +%token SYSTEM /* 32N2439 */ +%token SYSTEM_TIME_SYM /* 32N2439 */ %token TABLES %token TABLESPACE %token TABLE_REF_PRIORITY @@ -971,6 +975,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token VARIANCE_SYM %token VARYING /* SQL-2003-R */ %token VAR_SAMP_SYM +%token VERSIONING_SYM /* 32N2439 */ %token VIA_SYM %token VIEW_SYM /* SQL-2003-N */ %token VIRTUAL_SYM @@ -983,8 +988,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WINDOW_SYM %token WHILE_SYM %token WITH /* SQL-2003-R */ +%token WITHOUT /* SQL-2003-R */ %token WITH_CUBE_SYM /* INTERNAL */ %token WITH_ROLLUP_SYM /* INTERNAL */ +%token WITH_SYSTEM_SYM /* INTERNAL */ %token WORK_SYM /* SQL-2003-N */ %token WRAPPER_SYM %token WRITE_SYM /* SQL-2003-N */ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index e5f9be5c769..afa4e30ba81 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -387,6 +387,40 @@ static Sys_var_charptr Sys_my_bind_addr( READ_ONLY GLOBAL_VAR(my_bind_addr_str), CMD_LINE(REQUIRED_ARG), IN_FS_CHARSET, DEFAULT(0)); +static Sys_var_vers_asof Sys_vers_current_time( + "versioning_current_timestamp", "Default AS OF value for versioned tables", + SESSION_VAR(vers_current_time), CMD_LINE(REQUIRED_ARG, OPT_VERS_CURRENT_TIME), + IN_FS_CHARSET, DEFAULT("now")); + +static Sys_var_mybool Sys_vers_force( + "versioning_force", "Force system versioning for all created tables", + SESSION_VAR(vers_force), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static const char *vers_hide_keywords[]= {"AUTO", "IMPLICIT", "FULL", "NEVER", NullS}; +static Sys_var_enum Sys_vers_hide( + "versioning_hide", "Hide system versioning from being displayed in table info. " + "AUTO: hide implicit system fields only in non-versioned and AS OF queries; " + "IMPLICIT: hide implicit system fields in all queries; " + "FULL: hide any system fields in all queries and hide versioning info in SHOW commands; " + "NEVER: don't hide system fields", + SESSION_VAR(vers_hide), CMD_LINE(OPT_ARG), vers_hide_keywords, DEFAULT(VERS_HIDE_AUTO)); + +static Sys_var_mybool Sys_vers_innodb_algorithm_simple( + "versioning_innodb_algorithm_simple", + "Use simple algorithm of timestamp handling in InnoDB instead of TRX_SEES", + SESSION_VAR(vers_innodb_algorithm_simple), CMD_LINE(OPT_ARG), + DEFAULT(TRUE)); + +static const char *vers_alter_history_keywords[]= {"KEEP", "SURVIVE", "DROP", + NULL}; +static Sys_var_enum Sys_vers_alter_history( + "versioning_alter_history", "Versioning ALTER TABLE mode. " + "KEEP: leave historical system rows as is on ALTER TABLE; " + "SURVIVE: use DDL survival feature; " + "DROP: delete historical system rows on ALTER TABLE", + SESSION_VAR(vers_alter_history), CMD_LINE(OPT_ARG), + vers_alter_history_keywords, DEFAULT(VERS_ALTER_HISTORY_KEEP)); + static Sys_var_ulonglong Sys_binlog_cache_size( "binlog_cache_size", "The size of the transactional cache for " "updates to transactional engines for the binary log. " diff --git a/sql/sys_vars.ic b/sql/sys_vars.ic index 706240727c5..a302bbaa837 100644 --- a/sql/sys_vars.ic +++ b/sql/sys_vars.ic @@ -77,6 +77,11 @@ #define GET_HA_ROWS GET_ULONG #endif +// Disable warning caused by SESSION_VAR() macro +#ifdef __clang__ +#pragma clang diagnostic ignored "-Winvalid-offsetof" +#endif + /* special assert for sysvars. Tells the name of the variable, and fails even in non-debug builds. @@ -2597,3 +2602,131 @@ public: bool global_update(THD *thd, set_var *var); uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base); }; + + +class Sys_var_vers_asof: public sys_var +{ +public: + Sys_var_vers_asof(const char *name_arg, const char *comment, int flag_args, + ptrdiff_t off, size_t size, CMD_LINE getopt, enum charset_enum is_os_charset_arg, + const char *def_val, on_check_function on_check_func=0, on_update_function on_update_func=0) : + sys_var(&all_sys_vars, name_arg, comment, flag_args, off, getopt.id, getopt.arg_type, + SHOW_CHAR, (intptr) def_val, 0, VARIABLE_NOT_IN_BINLOG, on_check_func, on_update_func, 0) + { + option.var_type|= GET_STR; + if (global_update(def_val)) + { + DBUG_ASSERT(false); + } + } + + bool do_check(THD *thd, set_var *var) + { return false; } + + bool update(String &in, st_vers_current_time &out) + { + if (in.length() == 3 && 0 == my_strcasecmp(in.charset(), "ALL", in.ptr())) + { + out.type= FOR_SYSTEM_TIME_ALL; + } + else if (in.length() == 3 && 0 == my_strcasecmp(in.charset(), "NOW", in.ptr())) + { + out.type= FOR_SYSTEM_TIME_UNSPECIFIED; + } + else + { + MYSQL_TIME_STATUS status; + if (str_to_datetime(in.ptr(), in.length(), &out.ltime, flags, &status) || + out.ltime.time_type != MYSQL_TIMESTAMP_DATETIME || + (status.warnings & ~MYSQL_TIME_NOTE_TRUNCATED) != 0) + { + return true; + } + out.type= FOR_SYSTEM_TIME_AS_OF; + } + return false; + } + bool update(THD *thd, set_var *var, st_vers_current_time &out) + { + Item *item= var->value; + + switch (item->result_type()) + { + case TIME_RESULT: + { + if (item->get_date(&out.ltime, 0)) + break; + out.type= FOR_SYSTEM_TIME_AS_OF; + return false; + } + + case STRING_RESULT: + { + String *str= item->val_str(); + if (!str || update(*str, out)) + break; + return false; + } + default: + break; + } + String *str= item->val_str(); + const char *cstr= str ? str->c_ptr_safe() : "NULL"; + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, cstr); + return true; + } + bool global_update(const char *in) + { + String s(in, &my_charset_utf8_general_ci); + return update(s, global_var(st_vers_current_time)); + } + bool option_updated() + { + return global_update(global_var(st_vers_current_time).str_value); + } + bool global_update(THD *thd, set_var *var) + { + return update(thd, var, global_var(st_vers_current_time)); + } + bool session_update(THD *thd, set_var *var) + { + return update(thd, var, session_var(thd, st_vers_current_time)); + } + uchar *valptr(THD *thd, st_vers_current_time &val) + { + switch (val.type) + { + case FOR_SYSTEM_TIME_UNSPECIFIED: + return (uchar*) thd->strdup("NOW"); + case FOR_SYSTEM_TIME_ALL: + return (uchar*) thd->strdup("ALL"); + case FOR_SYSTEM_TIME_AS_OF: + { + uchar *buf= (uchar*) thd->alloc(MAX_DATE_STRING_REP_LENGTH); + if (buf) + { + if (!my_datetime_to_str(&val.ltime, (char*) buf, 6)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "vers_current_time", "NULL (wrong datetime)"); + return (uchar*) thd->strdup("Error: wrong datetime"); + } + } + return buf; + } + default: + break; + } + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "vers_current_time", "NULL (wrong range type)"); + return (uchar*) thd->strdup("Error: wrong range type"); + } + void session_save_default(THD *thd, set_var *var) + { DBUG_ASSERT(false); } + void global_save_default(THD *thd, set_var *var) + { DBUG_ASSERT(false); } + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, session_var(thd, st_vers_current_time)); } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(st_vers_current_time)); } + uchar *default_value_ptr(THD *thd) + { return (uchar *)option.def_value; } +}; diff --git a/sql/table.cc b/sql/table.cc index 6d32440bb11..6155af12182 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -420,6 +420,9 @@ void TABLE_SHARE::destroy() DBUG_ENTER("TABLE_SHARE::destroy"); DBUG_PRINT("info", ("db: %s table: %s", db.str, table_name.str)); + if (versioned) + vers_destroy(); + if (ha_share) { delete ha_share; @@ -1191,6 +1194,12 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, uint len; uint ext_key_parts= 0; plugin_ref se_plugin= 0; + const uchar *system_period= 0; + bool vtmd_used= false; + share->vtmd= false; + const uchar *extra2_field_flags= 0; + size_t extra2_field_flags_length= 0; + MEM_ROOT *old_root= thd->mem_root; Virtual_column_info **table_check_constraints; DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image"); @@ -1226,7 +1235,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if (*extra2 != '/') // old frm had '/' there { const uchar *e2end= extra2 + len; - while (extra2 + 3 < e2end) + while (extra2 + 3 <= e2end) { uchar type= *extra2++; size_t length= *extra2++; @@ -1284,6 +1293,25 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, } #endif /*HAVE_SPATIAL*/ break; + case EXTRA2_PERIOD_FOR_SYSTEM_TIME: + if (system_period || length != 2 * sizeof(uint16)) + goto err; + system_period = extra2; + break; + case EXTRA2_FIELD_FLAGS: + if (extra2_field_flags) + goto err; + extra2_field_flags= extra2; + extra2_field_flags_length= length; + break; + case EXTRA2_VTMD: + if (vtmd_used) + goto err; + share->vtmd= *extra2; + if (share->vtmd) + share->table_category= TABLE_CATEGORY_LOG; + vtmd_used= true; + break; default: /* abort frm parsing if it's an unknown but important extra2 value */ if (type >= EXTRA2_ENGINE_IMPORTANT) @@ -1602,6 +1630,8 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, disk_buff= frm_image + pos + FRM_FORMINFO_SIZE; share->fields= uint2korr(forminfo+258); + if (extra2_field_flags && extra2_field_flags_length != share->fields) + goto err; pos= uint2korr(forminfo+260); /* Length of all screens */ n_length= uint2korr(forminfo+268); interval_count= uint2korr(forminfo+270); @@ -1733,6 +1763,27 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, strpos, vcol_screen_pos); } + /* Set system versioning information. */ + if (system_period == NULL) + { + versioned= false; + row_start_field = 0; + row_end_field = 0; + } + else + { + DBUG_PRINT("info", ("Setting system versioning informations")); + uint16 row_start= uint2korr(system_period); + uint16 row_end= uint2korr(system_period + sizeof(uint16)); + if (row_start >= share->fields || row_end >= share->fields) + goto err; + DBUG_PRINT("info", ("Columns with system versioning: [%d, %d]", row_start, row_end)); + versioned= true; + vers_init(); + row_start_field= row_start; + row_end_field= row_end; + } // if (system_period == NULL) + for (i=0 ; i < share->fields; i++, strpos+=field_pack_length, field_ptr++) { uint pack_flag, interval_nr, unireg_type, recpos, field_length; @@ -1747,6 +1798,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, uint gis_length, gis_decimals, srid= 0; Field::utype unireg_check; const Type_handler *handler; + uint32 flags= 0; if (new_frm_ver >= 3) { @@ -1956,6 +2008,14 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, swap_variables(uint, null_bit_pos, mysql57_vcol_null_bit_pos); } + if (versioned) + { + if (i == row_start_field) + flags|= VERS_SYS_START_FLAG; + else if (i == row_end_field) + flags|= VERS_SYS_END_FLAG; + } + /* Convert pre-10.2.2 timestamps to use Field::default_value */ unireg_check= (Field::utype) MTYP_TYPENR(unireg_type); name.str= fieldnames.type_names[i]; @@ -1967,7 +2027,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, null_pos, null_bit_pos, pack_flag, handler, charset, geom_type, srid, unireg_check, (interval_nr ? share->intervals+interval_nr-1 : NULL), - &name); + &name, flags); if (!reg_field) // Not supported field type goto err; @@ -1983,6 +2043,15 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, reg_field->field_index= i; reg_field->comment=comment; reg_field->vcol_info= vcol_info; + reg_field->flags|= flags; + if (extra2_field_flags) + { + uchar flags= *extra2_field_flags++; + if (flags & VERS_OPTIMIZED_UPDATE) + reg_field->flags|= VERS_OPTIMIZED_UPDATE_FLAG; + if (flags & HIDDEN) + reg_field->flags|= HIDDEN_FLAG; + } if (field_type == MYSQL_TYPE_BIT && !f_bit_as_char(pack_flag)) { null_bits_are_used= 1; @@ -2553,19 +2622,21 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, bitmap_clear_all(share->check_set); } - delete handler_file; #ifndef DBUG_OFF if (use_hash) (void) my_hash_check(&share->name_hash); #endif share->db_plugin= se_plugin; + delete handler_file; + share->error= OPEN_FRM_OK; thd->status_var.opened_shares++; thd->mem_root= old_root; DBUG_RETURN(0); - err: +err: + share->db_plugin= NULL; share->error= OPEN_FRM_CORRUPTED; share->open_errno= my_errno; delete handler_file; @@ -3073,25 +3144,30 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, records=0; if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) records=1; - if (prgflag & (READ_ALL+EXTRA_RECORD)) + if (prgflag & (READ_ALL + EXTRA_RECORD)) + { records++; - - if (!(record= (uchar*) alloc_root(&outparam->mem_root, - share->rec_buff_length * records))) - goto err; /* purecov: inspected */ + if (share->versioned) + records++; + } if (records == 0) { /* We are probably in hard repair, and the buffers should not be used */ - outparam->record[0]= outparam->record[1]= share->default_values; + record= share->default_values; } else { - outparam->record[0]= record; - if (records > 1) - outparam->record[1]= record+ share->rec_buff_length; - else - outparam->record[1]= outparam->record[0]; // Safety + if (!(record= (uchar*) alloc_root(&outparam->mem_root, + share->rec_buff_length * records))) + goto err; /* purecov: inspected */ + } + + for (i= 0; i < 3;) + { + outparam->record[i]= record; + if (++i < records) + record+= share->rec_buff_length; } if (!(field_ptr = (Field **) alloc_root(&outparam->mem_root, @@ -3116,6 +3192,26 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, } (*field_ptr)= 0; // End marker + if (share->versioned) + { + Field **fptr = NULL; + if (!(fptr = (Field **) alloc_root(&outparam->mem_root, + (uint) ((share->fields+1)* + sizeof(Field*))))) + goto err; + + outparam->non_generated_field = fptr; + for (i=0 ; i < share->fields; i++) + { + if (outparam->field[i]->vers_sys_field()) + continue; + *fptr++ = outparam->field[i]; + } + (*fptr)= 0; // End marker + } + else + outparam->non_generated_field= NULL; + if (share->found_next_number_field) outparam->found_next_number_field= outparam->field[(uint) (share->found_next_number_field - share->field)]; @@ -3207,6 +3303,7 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, } #ifdef WITH_PARTITION_STORAGE_ENGINE + bool work_part_info_used; if (share->partition_info_str_len && outparam->file) { /* @@ -3227,7 +3324,6 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, thd->set_n_backup_active_arena(&part_func_arena, &backup_arena); thd->stmt_arena= &part_func_arena; bool tmp; - bool work_part_info_used; tmp= mysql_unpack_partition(thd, share->partition_info_str, share->partition_info_str_len, @@ -3387,6 +3483,38 @@ partititon_err: if (share->no_replicate || !binlog_filter->db_ok(share->db.str)) share->can_do_row_logging= 0; // No row based replication +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (outparam->part_info && + outparam->part_info->part_type == VERSIONING_PARTITION) + { + Query_arena *backup_stmt_arena_ptr= thd->stmt_arena; + Query_arena backup_arena; + Query_arena part_func_arena(&outparam->mem_root, + Query_arena::STMT_INITIALIZED); + if (!work_part_info_used) + { + thd->set_n_backup_active_arena(&part_func_arena, &backup_arena); + thd->stmt_arena= &part_func_arena; + } + + bool err= outparam->part_info->vers_setup_stats(thd, is_create_table); + + if (!work_part_info_used) + { + thd->stmt_arena= backup_stmt_arena_ptr; + thd->restore_active_arena(&part_func_arena, &backup_arena); + } + + if (err) + { + outparam->file->ha_close(); + error= OPEN_FRM_OPEN_ERROR; + error_reported= true; + goto err; + } + } +#endif + /* Increment the opened_tables counter, only when open flags set. */ if (db_stat) thd->status_var.opened_tables++; @@ -4634,16 +4762,19 @@ bool TABLE_LIST::create_field_translation(THD *thd) */ if (is_view() && get_unit()->prepared && !field_translation_updated) { + field_translation_updated= TRUE; + if (field_translation_end - field_translation < select->item_list.elements) + goto allocate; while ((item= it++)) { field_translation[field_count++].item= item; } - field_translation_updated= TRUE; } DBUG_RETURN(FALSE); } +allocate: arena= thd->activate_stmt_arena_if_needed(&backup); /* Create view fields translation table */ @@ -6285,6 +6416,15 @@ void TABLE::mark_columns_needed_for_delete() if (need_signal) file->column_bitmaps_signal(); + + /* + For System Versioning we have to write and read Sys_end. + */ + if (s->versioned) + { + bitmap_set_bit(read_set, s->vers_end_field()->field_index); + bitmap_set_bit(write_set, s->vers_end_field()->field_index); + } } @@ -6361,6 +6501,15 @@ void TABLE::mark_columns_needed_for_update() need_signal= true; } } + /* + For System Versioning we have to read all columns since we will store + a copy of previous row with modified Sys_end column back to a table. + */ + if (s->versioned) + { + // We will copy old columns to a new row. + use_all_columns(); + } if (check_constraints) { mark_check_constraint_columns_for_read(); @@ -7546,6 +7695,41 @@ int TABLE::update_default_fields(bool update_command, bool ignore_errors) DBUG_RETURN(res); } +void TABLE::vers_update_fields() +{ + DBUG_ENTER("vers_update_fields"); + + bitmap_set_bit(write_set, vers_start_field()->field_index); + bitmap_set_bit(write_set, vers_end_field()->field_index); + + if (vers_start_field()->set_time()) + DBUG_ASSERT(0); + vers_end_field()->set_max(); + + DBUG_VOID_RETURN; +} + + +bool TABLE_LIST::vers_vtmd_name(String& out) const +{ + static const char *vtmd_suffix= "_vtmd"; + static const size_t vtmd_suffix_len= strlen(vtmd_suffix); + if (table_name_length > NAME_CHAR_LEN - vtmd_suffix_len) + { + my_printf_error(ER_VERS_VTMD_ERROR, "Table name is longer than %d characters", MYF(0), int(NAME_CHAR_LEN - vtmd_suffix_len)); + return true; + } + out.set(table_name, table_name_length, table_alias_charset); + if (out.append(vtmd_suffix, vtmd_suffix_len + 1)) + { + my_message(ER_VERS_VTMD_ERROR, "Failed allocate VTMD name", MYF(0)); + return true; + } + out.length(out.length() - 1); + return false; +} + + /** Reset markers that fields are being updated */ @@ -8291,6 +8475,24 @@ LEX_CSTRING *fk_option_name(enum_fk_option opt) return names + opt; } +void vers_select_conds_t::resolve_units(bool timestamps_only) +{ + DBUG_ASSERT(type != FOR_SYSTEM_TIME_UNSPECIFIED); + DBUG_ASSERT(start); + if (unit_start == UNIT_AUTO) + { + unit_start= (!timestamps_only && (start->result_type() == INT_RESULT || + start->result_type() == REAL_RESULT)) ? + UNIT_TRX_ID : UNIT_TIMESTAMP; + } + if (end && unit_end == UNIT_AUTO) + { + unit_end= (!timestamps_only && (end->result_type() == INT_RESULT || + end->result_type() == REAL_RESULT)) ? + UNIT_TRX_ID : UNIT_TIMESTAMP; + } +} + Field *TABLE::find_field_by_name(LEX_CSTRING *str) const { diff --git a/sql/table.h b/sql/table.h index 94a161eefba..9c4f6cb6527 100644 --- a/sql/table.h +++ b/sql/table.h @@ -561,6 +561,11 @@ struct TABLE_STATISTICS_CB bool histograms_are_read; }; +class Vers_min_max_stats; + +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif /** This structure is shared between different table objects. There is one @@ -737,6 +742,57 @@ struct TABLE_SHARE #endif /** + System versioning support. + */ + + bool versioned; + bool vtmd; + uint16 row_start_field; + uint16 row_end_field; + uint32 hist_part_id; + Vers_min_max_stats** stat_trx; + ulonglong stat_serial; // guards check_range_constants() updates + + bool busy_rotation; + mysql_mutex_t LOCK_rotation; + mysql_cond_t COND_rotation; + mysql_rwlock_t LOCK_stat_serial; + + void vers_init() + { + hist_part_id= UINT32_MAX; + busy_rotation= false; + stat_trx= NULL; + stat_serial= 0; + mysql_mutex_init(key_TABLE_SHARE_LOCK_rotation, &LOCK_rotation, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_TABLE_SHARE_COND_rotation, &COND_rotation, NULL); + mysql_rwlock_init(key_rwlock_LOCK_stat_serial, &LOCK_stat_serial); + } + + void vers_destroy() + { + mysql_mutex_destroy(&LOCK_rotation); + mysql_cond_destroy(&COND_rotation); + mysql_rwlock_destroy(&LOCK_stat_serial); + } + + Field *vers_start_field() + { + return field[row_start_field]; + } + + Field *vers_end_field() + { + return field[row_end_field]; + } + + void vers_wait_rotation() + { + while (busy_rotation) + mysql_cond_wait(&COND_rotation, &LOCK_rotation); + } + + /** Cache the checked structure of this table. The pointer data is used to describe the structure that @@ -1046,7 +1102,7 @@ public: uint32 instance; /** Table cache instance this TABLE is belonging to */ THD *in_use; /* Which thread uses this */ - uchar *record[2]; /* Pointer to records */ + uchar *record[3]; /* Pointer to records */ uchar *write_row_record; /* Used as optimisation in THD::write_row */ uchar *insert_values; /* used by INSERT ... UPDATE */ @@ -1079,6 +1135,8 @@ public: Field **default_field; /* Fields with non-constant DEFAULT */ Field *next_number_field; /* Set if next_number is activated */ Field *found_next_number_field; /* Set on open */ + Field **non_generated_field; /* Like **field but without generated + fields */ Virtual_column_info **check_constraints; /* Table's triggers, 0 if there are no of them */ @@ -1426,6 +1484,7 @@ public: int update_virtual_field(Field *vf); int update_virtual_fields(handler *h, enum_vcol_update_mode update_mode); int update_default_fields(bool update, bool ignore_errors); + void vers_update_fields(); void reset_default_fields(); inline ha_rows stat_records() { return used_stat_records; } @@ -1442,6 +1501,59 @@ public: bool with_cleanup); Field *find_field_by_name(LEX_CSTRING *str) const; bool export_structure(THD *thd, class Row_definition_list *defs); + + /** + System Versioning support + */ + + bool versioned() const + { + DBUG_ASSERT(s); + return s->versioned; + } + + /* Versioned by SQL layer */ + bool versioned_by_sql() const + { + DBUG_ASSERT(s && file); + return s->versioned && !file->native_versioned(); + } + + bool versioned_by_engine() const + { + DBUG_ASSERT(s && file); + return s->versioned && file->native_versioned(); + } + + bool vers_vtmd() const + { + DBUG_ASSERT(s); + return s->versioned && s->vtmd; + } + + Field *vers_start_field() const + { + DBUG_ASSERT(s && s->versioned); + return field[s->row_start_field]; + } + + Field *vers_end_field() const + { + DBUG_ASSERT(s && s->versioned); + return field[s->row_end_field]; + } + + int delete_row(); + +/** Number of additional fields used in versioned tables */ +#define VERSIONING_FIELDS 2 + + uint vers_user_fields() const + { + return s->versioned ? + s->fields - VERSIONING_FIELDS : + s->fields; + } }; @@ -1729,6 +1841,62 @@ class Item_in_subselect; 4) jtbm semi-join (jtbm_subselect != NULL) */ +enum vers_range_unit_t +{ + UNIT_AUTO = 0, + UNIT_TIMESTAMP, + UNIT_TRX_ID +}; + +/** last_leaf_for_name_resolutioning support. */ +struct vers_select_conds_t +{ + vers_range_type_t type; + vers_range_unit_t unit_start, unit_end; + bool import_outer:1; + bool from_inner:1; + Item *start, *end; + + void empty() + { + type= FOR_SYSTEM_TIME_UNSPECIFIED; + unit_start= unit_end= UNIT_AUTO; + import_outer= from_inner= false; + start= end= NULL; + } + + void init( + vers_range_type_t t, + vers_range_unit_t u_start= UNIT_AUTO, + Item * s= NULL, + vers_range_unit_t u_end= UNIT_AUTO, + Item * e= NULL) + { + type= t; + unit_start= u_start; + unit_end= u_end; + start= s; + end= e; + import_outer= from_inner= false; + } + + bool init_from_sysvar(THD *thd); + + bool operator== (vers_range_type_t b) + { + return type == b; + } + bool operator!= (vers_range_type_t b) + { + return type != b; + } + operator bool() const + { + return type != FOR_SYSTEM_TIME_UNSPECIFIED; + } + void resolve_units(bool timestamps_only); +}; + struct LEX; class Index_hint; struct TABLE_LIST @@ -1794,6 +1962,7 @@ struct TABLE_LIST const char *db, *alias, *table_name, *schema_table_name; const char *option; /* Used by cache index */ Item *on_expr; /* Used with outer join */ + Item *saved_on_expr; /* Used with SP and System Versioning */ Item *sj_on_expr; /* @@ -2186,6 +2355,12 @@ struct TABLE_LIST TABLE_LIST *find_underlying_table(TABLE *table); TABLE_LIST *first_leaf_for_name_resolution(); TABLE_LIST *last_leaf_for_name_resolution(); + + /* System Versioning */ + vers_select_conds_t vers_conditions; + bool vers_vtmd_name(String &out) const; + bool vers_force_alias; + /** @brief Find the bottom in the chain of embedded table VIEWs. diff --git a/sql/tztime.h b/sql/tztime.h index eb7d85c48b2..d3f19fa2fd3 100644 --- a/sql/tztime.h +++ b/sql/tztime.h @@ -89,6 +89,7 @@ extern my_time_t sec_since_epoch_TIME(MYSQL_TIME *t); static const int MY_TZ_TABLES_COUNT= 4; +extern Time_zone* thd_get_timezone(THD* thd); #endif /* !defined(TESTTIME) && !defined(TZINFO2SQL) */ #endif /* TZTIME_INCLUDED */ diff --git a/sql/unireg.cc b/sql/unireg.cc index 1ca0233552e..e8d94741800 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -87,6 +87,46 @@ static uchar *extra2_write(uchar *pos, enum extra2_frm_value_type type, return extra2_write(pos, type, reinterpret_cast<LEX_CSTRING *>(str)); } +static const bool ROW_START = true; +static const bool ROW_END = false; + +inline +uint16 +vers_get_field(HA_CREATE_INFO *create_info, List<Create_field> &create_fields, bool row_start) +{ + DBUG_ASSERT(create_info->versioned()); + + List_iterator<Create_field> it(create_fields); + Create_field *sql_field = NULL; + + const LString_i row_field= row_start ? create_info->vers_info.as_row.start + : create_info->vers_info.as_row.end; + DBUG_ASSERT(row_field); + + for (unsigned field_no = 0; (sql_field = it++); ++field_no) + { + if (row_field == sql_field->field_name) + { + DBUG_ASSERT(field_no <= uint16(~0U)); + return uint16(field_no); + } + } + + DBUG_ASSERT(0); /* Not Reachable */ + return 0; +} + +bool has_extra2_field_flags(List<Create_field> &create_fields) +{ + List_iterator<Create_field> it(create_fields); + while (Create_field *f= it++) + { + if (f->flags & (VERS_OPTIMIZED_UPDATE_FLAG | HIDDEN_FLAG)) + return true; + } + return false; +} + /** Create a frm (table definition) file @@ -219,6 +259,22 @@ LEX_CUSTRING build_frm_image(THD *thd, const char *table, if (gis_extra2_len) extra2_size+= 1 + (gis_extra2_len > 255 ? 3 : 1) + gis_extra2_len; + if (create_info->versioned()) + { + extra2_size+= 1 + 1 + 2 * sizeof(uint16); + } + + if (create_info->vtmd()) + { + extra2_size+= 1 + 1 + 1; + } + + bool has_extra2_field_flags_= has_extra2_field_flags(create_fields); + if (has_extra2_field_flags_) + { + extra2_size+= + 1 + (create_fields.elements <= 255 ? 1 : 3) + create_fields.elements; + } key_buff_length= uint4korr(fileinfo+47); @@ -275,6 +331,39 @@ LEX_CUSTRING build_frm_image(THD *thd, const char *table, } #endif /*HAVE_SPATIAL*/ + if (create_info->versioned()) + { + *pos++= EXTRA2_PERIOD_FOR_SYSTEM_TIME; + *pos++= 2 * sizeof(uint16); + int2store(pos, vers_get_field(create_info, create_fields, ROW_START)); + pos+= sizeof(uint16); + int2store(pos, vers_get_field(create_info, create_fields, ROW_END)); + pos+= sizeof(uint16); + } + + if (create_info->vtmd()) + { + *pos++= EXTRA2_VTMD; + *pos++= 1; + *pos++= 1; + } + + if (has_extra2_field_flags_) + { + *pos++= EXTRA2_FIELD_FLAGS; + pos= extra2_write_len(pos, create_fields.elements); + List_iterator<Create_field> it(create_fields); + while (Create_field *field= it++) + { + uchar flags= 0; + if (field->flags & VERS_OPTIMIZED_UPDATE_FLAG) + flags|= VERS_OPTIMIZED_UPDATE; + if (field->flags & HIDDEN_FLAG) + flags|= HIDDEN; + *pos++= flags; + } + } + int4store(pos, filepos); // end of the extra2 segment pos+= 4; @@ -960,7 +1049,8 @@ static bool make_empty_rec(THD *thd, uchar *buff, uint table_options, field->unireg_check, field->save_interval ? field->save_interval : field->interval, - &field->field_name); + &field->field_name, + field->flags); if (!regfield) { error= 1; diff --git a/sql/unireg.h b/sql/unireg.h index b0cfb3841ef..25cb22207ad 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -172,12 +172,20 @@ enum extra2_frm_value_type { EXTRA2_TABLEDEF_VERSION=0, EXTRA2_DEFAULT_PART_ENGINE=1, EXTRA2_GIS=2, + EXTRA2_PERIOD_FOR_SYSTEM_TIME=4, + EXTRA2_FIELD_FLAGS=8, + EXTRA2_VTMD=16, #define EXTRA2_ENGINE_IMPORTANT 128 EXTRA2_ENGINE_TABLEOPTS=128, }; +enum extra2_field_flags { + VERS_OPTIMIZED_UPDATE=1, + HIDDEN=2, +}; + int rea_create_table(THD *thd, LEX_CUSTRING *frm, const char *path, const char *db, const char *table_name, HA_CREATE_INFO *create_info, handler *file, diff --git a/sql/vers_string.h b/sql/vers_string.h new file mode 100644 index 00000000000..6d501e1b81c --- /dev/null +++ b/sql/vers_string.h @@ -0,0 +1,133 @@ +#ifndef VERS_STRING_INCLUDED +#define VERS_STRING_INCLUDED + +struct Compare_strncmp +{ + int operator()(const LEX_CSTRING& a, const LEX_CSTRING& b) const + { + return strncmp(a.str, b.str, a.length); + } + static CHARSET_INFO* charset() + { + return system_charset_info; + } +}; + +template <CHARSET_INFO* &CS= system_charset_info> +struct Compare_my_strcasecmp +{ + int operator()(const LEX_CSTRING& a, const LEX_CSTRING& b) const + { + DBUG_ASSERT(a.str[a.length] == 0 && b.str[b.length] == 0); + return my_strcasecmp(CS, a.str, b.str); + } + static CHARSET_INFO* charset() + { + return CS; + } +}; + +typedef Compare_my_strcasecmp<files_charset_info> Compare_fs; +typedef Compare_my_strcasecmp<table_alias_charset> Compare_t; + +template <class Storage= LEX_CSTRING> +struct LEX_STRING_u : public Storage +{ + LEX_STRING_u() + { + Storage::str= NULL; + Storage::length= 0; + } + LEX_STRING_u(const char *_str, uint32 _len, CHARSET_INFO *) + { + Storage::str= _str; + Storage::length= _len; + } + uint32 length() const + { + return Storage::length; + } + const char *ptr() const + { + return Storage::str; + } + void set(const char *_str, uint32 _len, CHARSET_INFO *) + { + Storage::str= _str; + Storage::length= _len; + } + const LEX_CSTRING& lex_cstring() const + { + return *this; + } + const LEX_STRING& lex_string() const + { + return *(LEX_STRING *)this; + } +}; + +template <class Compare= Compare_strncmp, class Storage= LEX_STRING_u<> > +struct XString : public Storage +{ +public: + XString() {} + XString(const char *_str, size_t _len) : + Storage(_str, _len, Compare::charset()) + { + } + XString(const LEX_STRING src) : + Storage(src.str, src.length, Compare::charset()) + { + } + XString(const LEX_CSTRING src) : + Storage(src.str, src.length, Compare::charset()) + { + } + XString(const char *_str) : + Storage(_str, strlen(_str), Compare::charset()) + { + } + bool operator== (const XString& b) const + { + return Storage::length() == b.length() && 0 == Compare()(this->lex_cstring(), b.lex_cstring()); + } + bool operator!= (const XString& b) const + { + return !(*this == b); + } + operator const char* () const + { + return Storage::ptr(); + } + operator LEX_CSTRING& () const + { + return this->lex_cstring(); + } + operator LEX_STRING () const + { + LEX_STRING res; + res.str= const_cast<char *>(this->ptr()); + res.length= this->length(); + return res; + } + operator bool () const + { + return Storage::ptr() != NULL; + } +}; + +typedef XString<> LString; +typedef XString<Compare_fs> LString_fs; +typedef XString<Compare_my_strcasecmp<> > LString_i; + +typedef XString<Compare_strncmp, String> SString; +typedef XString<Compare_fs, String> SString_fs; +typedef XString<Compare_t, String> SString_t; + + +#define XSTRING_WITH_LEN(X) (X).ptr(), (X).length() +#define DB_WITH_LEN(X) (X).db, (X).db_length +#define TABLE_NAME_WITH_LEN(X) (X).table_name, (X).table_name_length + + +#endif // VERS_STRING_INCLUDED diff --git a/sql/vers_utils.h b/sql/vers_utils.h new file mode 100644 index 00000000000..948139bfa9b --- /dev/null +++ b/sql/vers_utils.h @@ -0,0 +1,68 @@ +#ifndef VERS_UTILS_INCLUDED +#define VERS_UTILS_INCLUDED + +#include "table.h" +#include "sql_class.h" +#include "vers_string.h" + +class MDL_auto_lock +{ + THD *thd; + TABLE_LIST &table; + bool error; + +public: + MDL_auto_lock(THD *_thd, TABLE_LIST &_table) : + thd(_thd), table(_table) + { + DBUG_ASSERT(thd); + table.mdl_request.init(MDL_key::TABLE, table.db, table.table_name, MDL_EXCLUSIVE, MDL_EXPLICIT); + error= thd->mdl_context.acquire_lock(&table.mdl_request, thd->variables.lock_wait_timeout); + } + ~MDL_auto_lock() + { + if (!error) + { + DBUG_ASSERT(table.mdl_request.ticket); + thd->mdl_context.release_lock(table.mdl_request.ticket); + table.mdl_request.ticket= NULL; + } + } + bool acquire_error() const { return error; } +}; + + +class Local_da : public Diagnostics_area +{ + THD *thd; + uint sql_error; + Diagnostics_area *saved_da; + +public: + Local_da(THD *_thd, uint _sql_error= 0) : + Diagnostics_area(_thd->query_id, false, true), + thd(_thd), + sql_error(_sql_error), + saved_da(_thd->get_stmt_da()) + { + thd->set_stmt_da(this); + } + ~Local_da() + { + if (saved_da) + finish(); + } + void finish() + { + DBUG_ASSERT(saved_da && thd); + thd->set_stmt_da(saved_da); + if (is_error()) + my_error(sql_error ? sql_error : sql_errno(), MYF(0), message()); + if (warn_count() > error_count()) + saved_da->copy_non_errors_from_wi(thd, get_warning_info()); + saved_da= NULL; + } +}; + + +#endif // VERS_UTILS_INCLUDED diff --git a/sql/vtmd.cc b/sql/vtmd.cc new file mode 100644 index 00000000000..b761ed54281 --- /dev/null +++ b/sql/vtmd.cc @@ -0,0 +1,687 @@ +#include "vtmd.h" +#include "sql_base.h" +#include "sql_class.h" +#include "sql_handler.h" // mysql_ha_rm_tables() +#include "sql_table.h" +#include "sql_select.h" +#include "table_cache.h" // tdc_remove_table() +#include "key.h" +#include "sql_show.h" +#include "sql_parse.h" +#include "sql_lex.h" +#include "sp_head.h" +#include "sp_rcontext.h" + +LString VERS_VTMD_TEMPLATE(C_STRING_WITH_LEN("vtmd_template")); + +bool +VTMD_table::create(THD *thd) +{ + Table_specification_st create_info; + TABLE_LIST src_table, table; + create_info.init(DDL_options_st::OPT_LIKE); + create_info.options|= HA_VTMD; + create_info.alias= vtmd_name; + table.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_READ); + src_table.init_one_table( + LEX_STRING_WITH_LEN(MYSQL_SCHEMA_NAME), + XSTRING_WITH_LEN(VERS_VTMD_TEMPLATE), + VERS_VTMD_TEMPLATE, + TL_READ); + + Query_tables_backup backup(thd); + thd->lex->sql_command= backup.get().sql_command; + thd->lex->add_to_query_tables(&src_table); + + MDL_auto_lock mdl_lock(thd, table); + if (mdl_lock.acquire_error()) + return true; + + Reprepare_observer *reprepare_observer= thd->m_reprepare_observer; + partition_info *work_part_info= thd->work_part_info; + thd->m_reprepare_observer= NULL; + thd->work_part_info= NULL; + bool rc= mysql_create_like_table(thd, &table, &src_table, &create_info); + thd->m_reprepare_observer= reprepare_observer; + thd->work_part_info= work_part_info; + return rc; +} + +bool +VTMD_table::find_record(ulonglong sys_trx_end, bool &found) +{ + int error; + key_buf_t key; + found= false; + + DBUG_ASSERT(vtmd.table); + + if (key.allocate(vtmd.table->s->max_unique_length)) + return true; + + DBUG_ASSERT(sys_trx_end); + vtmd.table->vers_end_field()->set_notnull(); + vtmd.table->vers_end_field()->store(sys_trx_end, true); + key_copy(key, vtmd.table->record[0], vtmd.table->key_info + IDX_TRX_END, 0); + + error= vtmd.table->file->ha_index_read_idx_map(vtmd.table->record[1], IDX_TRX_END, + key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT); + if (error) + { + if (error == HA_ERR_RECORD_DELETED || error == HA_ERR_KEY_NOT_FOUND) + return false; + vtmd.table->file->print_error(error, MYF(0)); + return true; + } + + restore_record(vtmd.table, record[1]); + + found= true; + return false; +} + + +bool +VTMD_table::open(THD *thd, Local_da &local_da, bool *created) +{ + if (created) + *created= false; + + if (0 == vtmd_name.length() && about.vers_vtmd_name(vtmd_name)) + return true; + + while (true) // max 2 iterations + { + vtmd.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_WRITE_CONCURRENT_INSERT); + + TABLE *res= open_log_table(thd, &vtmd, &open_tables_backup); + if (res) + return false; + + if (created && !*created && local_da.is_error() && local_da.sql_errno() == ER_NO_SUCH_TABLE) + { + local_da.reset_diagnostics_area(); + if (create(thd)) + break; + *created= true; + } + else + break; + } + return true; +} + +bool +VTMD_table::update(THD *thd, const char* archive_name) +{ + bool result= true; + bool found= false; + bool created; + int error; + size_t an_len= 0; + ulonglong save_thd_options; + { + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + + save_thd_options= thd->variables.option_bits; + thd->variables.option_bits&= ~OPTION_BIN_LOG; + + if (open(thd, local_da, &created)) + goto open_error; + + if (!vtmd.table->versioned()) + { + my_message(ER_VERS_VTMD_ERROR, "VTMD is not versioned", MYF(0)); + goto quit; + } + + if (!created && find_record(ULONGLONG_MAX, found)) + goto quit; + + if ((error= vtmd.table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE))) + { + vtmd.table->file->print_error(error, MYF(0)); + goto quit; + } + + /* Honor next number columns if present */ + vtmd.table->next_number_field= vtmd.table->found_next_number_field; + + if (vtmd.table->s->fields != FIELD_COUNT) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` unexpected fields count: %d", MYF(0), + vtmd.table->s->db.str, vtmd.table->s->table_name.str, vtmd.table->s->fields); + goto quit; + } + + if (archive_name) + { + an_len= strlen(archive_name); + vtmd.table->field[FLD_ARCHIVE_NAME]->store(archive_name, an_len, table_alias_charset); + vtmd.table->field[FLD_ARCHIVE_NAME]->set_notnull(); + } + else + { + vtmd.table->field[FLD_ARCHIVE_NAME]->set_null(); + } + vtmd.table->field[FLD_COL_RENAMES]->set_null(); + + if (found) + { + if (thd->lex->sql_command == SQLCOM_CREATE_TABLE) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` exists and not empty!", MYF(0), + vtmd.table->s->db.str, vtmd.table->s->table_name.str); + goto quit; + } + vtmd.table->mark_columns_needed_for_update(); // not needed? + if (archive_name) + { + vtmd.table->s->versioned= false; + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + vtmd.table->s->versioned= true; + + if (!error) + { + if (thd->lex->sql_command == SQLCOM_DROP_TABLE) + { + error= vtmd.table->file->ha_delete_row(vtmd.table->record[0]); + } + else + { + DBUG_ASSERT(thd->lex->sql_command == SQLCOM_ALTER_TABLE); + ulonglong sys_trx_end= (ulonglong) vtmd.table->vers_start_field()->val_int(); + store_record(vtmd.table, record[1]); + vtmd.table->field[FLD_NAME]->store(TABLE_NAME_WITH_LEN(about), system_charset_info); + vtmd.table->field[FLD_NAME]->set_notnull(); + vtmd.table->field[FLD_ARCHIVE_NAME]->set_null(); + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + if (error) + goto err; + + DBUG_ASSERT(an_len); + while (true) + { // fill archive_name of last sequential renames + bool found; + if (find_record(sys_trx_end, found)) + goto quit; + if (!found || !vtmd.table->field[FLD_ARCHIVE_NAME]->is_null()) + break; + + store_record(vtmd.table, record[1]); + vtmd.table->field[FLD_ARCHIVE_NAME]->store(archive_name, an_len, table_alias_charset); + vtmd.table->field[FLD_ARCHIVE_NAME]->set_notnull(); + vtmd.table->s->versioned= false; + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + vtmd.table->s->versioned= true; + if (error) + goto err; + sys_trx_end= (ulonglong) vtmd.table->vers_start_field()->val_int(); + } // while (true) + } // else (thd->lex->sql_command != SQLCOM_DROP_TABLE) + } // if (!error) + } // if (archive_name) + else + { + vtmd.table->field[FLD_NAME]->store(TABLE_NAME_WITH_LEN(about), system_charset_info); + vtmd.table->field[FLD_NAME]->set_notnull(); + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + } + } // if (found) + else + { + vtmd.table->field[FLD_NAME]->store(TABLE_NAME_WITH_LEN(about), system_charset_info); + vtmd.table->field[FLD_NAME]->set_notnull(); + vtmd.table->mark_columns_needed_for_insert(); // not needed? + error= vtmd.table->file->ha_write_row(vtmd.table->record[0]); + } + + if (error) + { +err: + vtmd.table->file->print_error(error, MYF(0)); + } + else + result= local_da.is_error(); + } + +quit: + close_log_table(thd, &open_tables_backup); + +open_error: + thd->variables.option_bits= save_thd_options; + return result; +} + +bool +VTMD_rename::move_archives(THD *thd, LString &new_db) +{ + vtmd.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_READ); + int error; + bool rc= false; + SString_fs archive; + bool end_keyread= false; + bool index_end= false; + Open_tables_backup open_tables_backup; + key_buf_t key; + + TABLE *res= open_log_table(thd, &vtmd, &open_tables_backup); + if (!res) + return true; + + if (key.allocate(vtmd.table->key_info[IDX_ARCHIVE_NAME].key_length)) + { + close_log_table(thd, &open_tables_backup); + return true; + } + + if ((error= vtmd.table->file->ha_start_keyread(IDX_ARCHIVE_NAME))) + goto err; + end_keyread= true; + + if ((error= vtmd.table->file->ha_index_init(IDX_ARCHIVE_NAME, true))) + goto err; + index_end= true; + + error= vtmd.table->file->ha_index_first(vtmd.table->record[0]); + while (!error) + { + if (!vtmd.table->field[FLD_ARCHIVE_NAME]->is_null()) + { + vtmd.table->field[FLD_ARCHIVE_NAME]->val_str(&archive); + key_copy(key, + vtmd.table->record[0], + &vtmd.table->key_info[IDX_ARCHIVE_NAME], + vtmd.table->key_info[IDX_ARCHIVE_NAME].key_length, + false); + error= vtmd.table->file->ha_index_read_map( + vtmd.table->record[0], + key, + vtmd.table->key_info[IDX_ARCHIVE_NAME].ext_key_part_map, + HA_READ_PREFIX_LAST); + if (!error) + { + if ((rc= move_table(thd, archive, new_db))) + break; + + error= vtmd.table->file->ha_index_next(vtmd.table->record[0]); + } + } + else + { + archive.length(0); + error= vtmd.table->file->ha_index_next(vtmd.table->record[0]); + } + } + + if (error && error != HA_ERR_END_OF_FILE) + { +err: + vtmd.table->file->print_error(error, MYF(0)); + rc= true; + } + + if (index_end) + vtmd.table->file->ha_index_end(); + if (end_keyread) + vtmd.table->file->ha_end_keyread(); + + close_log_table(thd, &open_tables_backup); + return rc; +} + +bool +VTMD_rename::move_table(THD *thd, SString_fs &table_name, LString &new_db) +{ + handlerton *table_hton= NULL; + if (!ha_table_exists(thd, about.db, table_name, &table_hton) || !table_hton) + { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_VTMD_ERROR, + "`%s.%s` archive doesn't exist", + about.db, table_name.ptr()); + return false; + } + + if (ha_table_exists(thd, new_db, table_name)) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` archive already exists!", MYF(0), + new_db.ptr(), table_name.ptr()); + return true; + } + + TABLE_LIST tl; + tl.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(table_name), + table_name, + TL_WRITE_ONLY); + tl.mdl_request.set_type(MDL_EXCLUSIVE); + + mysql_ha_rm_tables(thd, &tl); + if (lock_table_names(thd, &tl, 0, thd->variables.lock_wait_timeout, 0)) + return true; + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, about.db, table_name, false); + + bool rc= mysql_rename_table( + table_hton, + about.db, table_name, + new_db, table_name, + NO_FK_CHECKS); + if (!rc) + query_cache_invalidate3(thd, &tl, 0); + + return rc; +} + +bool +VTMD_rename::try_rename(THD *thd, LString new_db, LString new_alias, const char *archive_name) +{ + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + TABLE_LIST new_table; + + if (check_exists(thd)) + return true; + + new_table.init_one_table( + XSTRING_WITH_LEN(new_db), + XSTRING_WITH_LEN(new_alias), + new_alias, TL_READ); + + if (new_table.vers_vtmd_name(vtmd_new_name)) + return true; + + if (ha_table_exists(thd, new_db, vtmd_new_name)) + { + if (exists) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` table already exists!", MYF(0), + new_db.ptr(), vtmd_new_name.ptr()); + return true; + } + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_VTMD_ERROR, + "`%s.%s` table already exists!", + new_db.ptr(), vtmd_new_name.ptr()); + return false; + } + + if (!exists) + return false; + + bool same_db= true; + if (LString_fs(DB_WITH_LEN(about)) != LString_fs(new_db)) + { + // Move archives before VTMD so if the operation is interrupted, it could be continued. + if (move_archives(thd, new_db)) + return true; + same_db= false; + } + + TABLE_LIST vtmd_tl; + vtmd_tl.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_WRITE_ONLY); + vtmd_tl.mdl_request.set_type(MDL_EXCLUSIVE); + + mysql_ha_rm_tables(thd, &vtmd_tl); + if (lock_table_names(thd, &vtmd_tl, 0, thd->variables.lock_wait_timeout, 0)) + return true; + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, about.db, vtmd_name, false); + if (local_da.is_error()) // just safety check + return true; + bool rc= mysql_rename_table(hton, + about.db, vtmd_name, + new_db, vtmd_new_name, + NO_FK_CHECKS); + if (!rc) + { + query_cache_invalidate3(thd, &vtmd_tl, 0); + if (same_db || archive_name || new_alias != LString(TABLE_NAME_WITH_LEN(about))) + { + local_da.finish(); + VTMD_table new_vtmd(new_table); + rc= new_vtmd.update(thd, archive_name); + } + } + return rc; +} + +bool +VTMD_rename::revert_rename(THD *thd, LString new_db) +{ + DBUG_ASSERT(hton); + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + + TABLE_LIST vtmd_tl; + vtmd_tl.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_new_name), + vtmd_new_name, + TL_WRITE_ONLY); + vtmd_tl.mdl_request.set_type(MDL_EXCLUSIVE); + mysql_ha_rm_tables(thd, &vtmd_tl); + if (lock_table_names(thd, &vtmd_tl, 0, thd->variables.lock_wait_timeout, 0)) + return true; + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, new_db, vtmd_new_name, false); + + bool rc= mysql_rename_table( + hton, + new_db, vtmd_new_name, + new_db, vtmd_name, + NO_FK_CHECKS); + + if (!rc) + query_cache_invalidate3(thd, &vtmd_tl, 0); + + return rc; +} + +void +VTMD_table::archive_name( + THD* thd, + const char* table_name, + char* new_name, + size_t new_name_size) +{ + const MYSQL_TIME now= thd->query_start_TIME(); + my_snprintf(new_name, new_name_size, "%s_%04d%02d%02d_%02d%02d%02d_%06d", + table_name, now.year, now.month, now.day, now.hour, now.minute, + now.second, now.second_part); +} + +bool +VTMD_table::find_archive_name(THD *thd, String &out) +{ + READ_RECORD info; + int error; + SQL_SELECT *select= NULL; + COND *conds= NULL; + List<TABLE_LIST> dummy; + SELECT_LEX &select_lex= thd->lex->select_lex; + + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + if (open(thd, local_da)) + return true; + + Name_resolution_context &ctx= thd->lex->select_lex.context; + TABLE_LIST *table_list= ctx.table_list; + TABLE_LIST *first_name_resolution_table= ctx.first_name_resolution_table; + table_map map = vtmd.table->map; + ctx.table_list= &vtmd; + ctx.first_name_resolution_table= &vtmd; + vtmd.table->map= 1; + + vtmd.vers_conditions= about.vers_conditions; + if ((error= vers_setup_select(thd, &vtmd, &conds, &select_lex)) || + (error= setup_conds(thd, &vtmd, dummy, &conds))) + goto err; + + select= make_select(vtmd.table, 0, 0, conds, NULL, 0, &error); + if (error) + goto loc_err; + + error= init_read_record(&info, thd, vtmd.table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + if (error) + goto loc_err; + + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + if (!select || select->skip_record(thd) > 0) + { + vtmd.table->field[FLD_ARCHIVE_NAME]->val_str(&out); + break; + } + } + + if (error < 0) + my_error(ER_NO_SUCH_TABLE, MYF(0), about.db, about.alias); + +loc_err: + end_read_record(&info); +err: + delete select; + ctx.table_list= table_list; + ctx.first_name_resolution_table= first_name_resolution_table; + vtmd.table->map= map; + close_log_table(thd, &open_tables_backup); + DBUG_ASSERT(!error || local_da.is_error()); + return error; +} + +static +bool +get_vtmd_tables(THD *thd, const char *db, + size_t db_length, Dynamic_array<LEX_CSTRING *> &table_names) +{ + LOOKUP_FIELD_VALUES lookup_field_values= { + {db, db_length}, {C_STRING_WITH_LEN("%_vtmd")}, false, true}; + + int res= make_table_name_list(thd, &table_names, thd->lex, &lookup_field_values, + &lookup_field_values.db_value); + + return res; +} + +bool +VTMD_table::get_archive_tables(THD *thd, const char *db, size_t db_length, + Dynamic_array<String> &result) +{ + Dynamic_array<LEX_CSTRING *> vtmd_tables; + if (get_vtmd_tables(thd, db, db_length, vtmd_tables)) + return true; + + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + for (uint i= 0; i < vtmd_tables.elements(); i++) + { + LEX_CSTRING table_name= *vtmd_tables.at(i); + + Open_tables_backup open_tables_backup; + TABLE_LIST table_list; + table_list.init_one_table(db, db_length, LEX_STRING_WITH_LEN(table_name), + table_name.str, TL_READ); + + TABLE *table= open_log_table(thd, &table_list, &open_tables_backup); + if (!table || !table->vers_vtmd()) + { + if (table) + close_log_table(thd, &open_tables_backup); + else + { + if (local_da.is_error() && local_da.sql_errno() == ER_NOT_LOG_TABLE) + local_da.reset_diagnostics_area(); + else + return true; + } + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_VTMD_ERROR, + "Table `%s.%s` is not a VTMD table", + db, table_name.str); + continue; + } + + READ_RECORD read_record; + int error= 0; + SQL_SELECT *sql_select= make_select(table, 0, 0, NULL, NULL, 0, &error); + if (error) + { + close_log_table(thd, &open_tables_backup); + return true; + } + error= init_read_record(&read_record, thd, table, sql_select, NULL, 1, 1, false); + if (error) + { + delete sql_select; + close_log_table(thd, &open_tables_backup); + return true; + } + + while (!(error= read_record.read_record())) + { + Field *field= table->field[FLD_ARCHIVE_NAME]; + if (field->is_null()) + continue; + + String archive_name; + field->val_str(&archive_name); + archive_name.set_ascii(strmake_root(thd->mem_root, archive_name.c_ptr(), + archive_name.length()), + archive_name.length()); + result.push(archive_name); + } + // check for EOF + if (!thd->is_error()) + error= 0; + + end_read_record(&read_record); + delete sql_select; + close_log_table(thd, &open_tables_backup); + } + + return false; +} + +bool VTMD_table::setup_select(THD* thd) +{ + SString archive_name; + if (find_archive_name(thd, archive_name)) + return true; + + if (archive_name.length() == 0) + return false; + + about.table_name= (char *) thd->memdup(archive_name.c_ptr_safe(), archive_name.length() + 1); + about.table_name_length= archive_name.length(); + DBUG_ASSERT(!about.mdl_request.ticket); + about.mdl_request.init(MDL_key::TABLE, about.db, about.table_name, + about.mdl_request.type, about.mdl_request.duration); + about.vers_force_alias= true; + // Since we modified SELECT_LEX::table_list, we need to invalidate current SP + if (thd->spcont) + { + DBUG_ASSERT(thd->spcont->m_sp); + thd->spcont->m_sp->set_sp_cache_version(ULONG_MAX); + } + return false; +} diff --git a/sql/vtmd.h b/sql/vtmd.h new file mode 100644 index 00000000000..e6f968d7739 --- /dev/null +++ b/sql/vtmd.h @@ -0,0 +1,183 @@ +#ifndef VTMD_INCLUDED +#define VTMD_INCLUDED + +#include "unireg.h" +#include <mysqld_error.h> +#include "my_sys.h" +#include "table.h" + +#include "vers_utils.h" + +class key_buf_t +{ + uchar* buf; + + key_buf_t(const key_buf_t&); // disabled + key_buf_t& operator= (const key_buf_t&); // disabled + +public: + key_buf_t() : buf(NULL) + {} + + ~key_buf_t() + { + if (buf) + my_free(buf); + } + + bool allocate(size_t alloc_size) + { + DBUG_ASSERT(!buf); + buf= static_cast<uchar *>(my_malloc(alloc_size, MYF(0))); + if (!buf) + { + my_message(ER_VERS_VTMD_ERROR, "failed to allocate key buffer", MYF(0)); + return true; + } + return false; + } + + operator uchar* () + { + DBUG_ASSERT(buf); + return reinterpret_cast<uchar *>(buf); + } +}; + +class THD; + +class VTMD_table +{ + Open_tables_backup open_tables_backup; + +protected: + TABLE_LIST vtmd; + TABLE_LIST &about; + SString_t vtmd_name; + +private: + VTMD_table(const VTMD_table&); // prohibit copying references + +public: + enum { + FLD_START= 0, + FLD_END, + FLD_NAME, + FLD_ARCHIVE_NAME, + FLD_COL_RENAMES, + FIELD_COUNT + }; + + enum { + IDX_TRX_END= 0, + IDX_ARCHIVE_NAME + }; + + VTMD_table(TABLE_LIST &_about) : + about(_about) + { + vtmd.table= NULL; + } + + bool create(THD *thd); + bool find_record(ulonglong sys_trx_end, bool &found); + bool open(THD *thd, Local_da &local_da, bool *created= NULL); + bool update(THD *thd, const char* archive_name= NULL); + bool setup_select(THD *thd); + + static void archive_name(THD *thd, const char *table_name, char *new_name, size_t new_name_size); + void archive_name(THD *thd, char *new_name, size_t new_name_size) + { + archive_name(thd, about.table_name, new_name, new_name_size); + } + + bool find_archive_name(THD *thd, String &out); + static bool get_archive_tables(THD *thd, const char *db, size_t db_length, + Dynamic_array<String> &result); +}; + +class VTMD_exists : public VTMD_table +{ +protected: + handlerton *hton; + +public: + bool exists; + +public: + VTMD_exists(TABLE_LIST &_about) : + VTMD_table(_about), + hton(NULL), + exists(false) + {} + + bool check_exists(THD *thd); // returns error status +}; + +class VTMD_rename : public VTMD_exists +{ + SString_t vtmd_new_name; + +public: + VTMD_rename(TABLE_LIST &_about) : + VTMD_exists(_about) + {} + + bool try_rename(THD *thd, LString new_db, LString new_alias, const char* archive_name= NULL); + bool revert_rename(THD *thd, LString new_db); + +private: + bool move_archives(THD *thd, LString &new_db); + bool move_table(THD *thd, SString_fs &table_name, LString &new_db); +}; + +class VTMD_drop : public VTMD_exists +{ + char archive_name_[NAME_CHAR_LEN]; + +public: + VTMD_drop(TABLE_LIST &_about) : + VTMD_exists(_about) + { + *archive_name_= 0; + } + + const char* archive_name(THD *thd) + { + VTMD_table::archive_name(thd, archive_name_, sizeof(archive_name_)); + return archive_name_; + } + + const char* archive_name() const + { + DBUG_ASSERT(*archive_name_); + return archive_name_; + } + + bool update(THD *thd) + { + DBUG_ASSERT(*archive_name_); + return VTMD_exists::update(thd, archive_name_); + } +}; + + +inline +bool +VTMD_exists::check_exists(THD *thd) +{ + if (about.vers_vtmd_name(vtmd_name)) + return true; + + exists= ha_table_exists(thd, about.db, vtmd_name, &hton); + + if (exists && !hton) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` handlerton empty!", MYF(0), + about.db, vtmd_name.ptr()); + return true; + } + return false; +} + +#endif // VTMD_INCLUDED diff --git a/sql/vtq.h b/sql/vtq.h new file mode 100644 index 00000000000..dcea8734b57 --- /dev/null +++ b/sql/vtq.h @@ -0,0 +1,47 @@ +#ifndef VTQ_INCLUDED +#define VTQ_INCLUDED + +/* Copyright (c) 2016, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + + +/** + VTQ stands for 'versioning transaction query': InnoDB system table that holds + transaction IDs, their corresponding times and other transaction-related + data which is used for transaction order resolution. When versioned table + marks its records lifetime with transaction IDs, VTQ is used to get their + actual timestamps. */ + + +enum vtq_field_t +{ + VTQ_ALL = 0, + VTQ_TRX_ID, + VTQ_COMMIT_ID, + VTQ_BEGIN_TS, + VTQ_COMMIT_TS, + VTQ_ISO_LEVEL +}; + +struct vtq_record_t +{ + ulonglong trx_id; + ulonglong commit_id; + timeval begin_ts; + timeval commit_ts; + uchar iso_level; +}; + +#endif /* VTQ_INCLUDED */ |