diff options
Diffstat (limited to 'sql')
80 files changed, 8163 insertions, 373 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 0f67032bcbe..0697b53e414 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -141,6 +141,7 @@ SET (SQL_SOURCE sql_type.cc item_windowfunc.cc sql_window.cc sql_cte.cc + item_vers.cc sql_sequence.cc sql_sequence.h ha_sequence.h sql_tvc.cc sql_tvc.h ${WSREP_SOURCES} @@ -149,6 +150,7 @@ SET (SQL_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc ${GEN_SOURCES} ${MYSYS_LIBWRAP_SOURCE} + vtmd.cc ) IF (CMAKE_SYSTEM_NAME MATCHES "Linux" OR diff --git a/sql/field.cc b/sql/field.cc index 51562dd4198..dff8fa489dc 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1987,6 +1987,48 @@ bool Field_num::get_date(MYSQL_TIME *ltime,ulonglong fuzzydate) } +bool Field_vers_trx_id::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate, ulonglong trx_id) +{ + ASSERT_COLUMN_MARKED_FOR_READ; + DBUG_ASSERT(ltime); + if (!table || !table->s) + return true; + DBUG_ASSERT(table->versioned_by_engine() || + (table->versioned() && table->s->table_category == TABLE_CATEGORY_TEMPORARY)); + if (!trx_id) + return true; + + THD *thd= get_thd(); + DBUG_ASSERT(thd); + if (trx_id == ULONGLONG_MAX) + { + thd->variables.time_zone->gmt_sec_to_TIME(ltime, TIMESTAMP_MAX_VALUE); + ltime->second_part= TIME_MAX_SECOND_PART; + return false; + } + if (cached == trx_id) + { + *ltime= cache; + return false; + } + + TR_table trt(thd); + bool found= trt.query(trx_id); + if (found) + { + trt[TR_table::FLD_COMMIT_TS]->get_date(&cache, fuzzydate); + *ltime= cache; + cached= trx_id; + return false; + } + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_NO_TRX_ID, ER_THD(thd, ER_VERS_NO_TRX_ID), + trx_id); + return true; +} + + Field_str::Field_str(uchar *ptr_arg,uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, const LEX_CSTRING *field_name_arg, @@ -2179,7 +2221,9 @@ Field *Field::make_new_field(MEM_ROOT *root, TABLE *new_table, */ tmp->unireg_check= Field::NONE; tmp->flags&= (NOT_NULL_FLAG | BLOB_FLAG | UNSIGNED_FLAG | - ZEROFILL_FLAG | BINARY_FLAG | ENUM_FLAG | SET_FLAG); + ZEROFILL_FLAG | BINARY_FLAG | ENUM_FLAG | SET_FLAG | + VERS_SYS_START_FLAG | VERS_SYS_END_FLAG | + VERS_UPDATE_UNVERSIONED_FLAG | HIDDEN_FLAG); tmp->reset_fields(); return tmp; } @@ -4295,6 +4339,26 @@ void Field_longlong::sql_type(String &res) const add_zerofill_and_unsigned(res); } +void Field_longlong::set_max() +{ + ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; + set_notnull(); + int8store(ptr, unsigned_flag ? ULONGLONG_MAX : LONGLONG_MAX); +} + +bool Field_longlong::is_max() +{ + ASSERT_COLUMN_MARKED_FOR_READ; + if (unsigned_flag) + { + ulonglong j; + j= uint8korr(ptr); + return j == ULONGLONG_MAX; + } + longlong j; + j= sint8korr(ptr); + return j == LONGLONG_MAX; +} /* Floating-point numbers @@ -5307,6 +5371,27 @@ void Field_timestampf::store_TIME(my_time_t timestamp, ulong sec_part) my_timestamp_to_binary(&tm, ptr, dec); } +void Field_timestampf::set_max() +{ + DBUG_ENTER("Field_timestampf::set_max"); + ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; + DBUG_ASSERT(dec == TIME_SECOND_PART_DIGITS); + + set_notnull(); + mi_int4store(ptr, TIMESTAMP_MAX_VALUE); + mi_int3store(ptr + 4, TIME_MAX_SECOND_PART); + + DBUG_VOID_RETURN; +} + +bool Field_timestampf::is_max() +{ + DBUG_ENTER("Field_timestampf::is_max"); + ASSERT_COLUMN_MARKED_FOR_READ; + + DBUG_RETURN(mi_sint4korr(ptr) == TIMESTAMP_MAX_VALUE && + mi_sint3korr(ptr + 4) == TIME_MAX_SECOND_PART); +} my_time_t Field_timestampf::get_timestamp(const uchar *pos, ulong *sec_part) const @@ -10307,7 +10392,8 @@ Field *make_field(TABLE_SHARE *share, Field::geometry_type geom_type, uint srid, Field::utype unireg_check, TYPELIB *interval, - const LEX_CSTRING *field_name) + const LEX_CSTRING *field_name, + uint32 flags) { uchar *UNINIT_VAR(bit_ptr); uchar UNINIT_VAR(bit_offset); @@ -10492,11 +10578,22 @@ Field *make_field(TABLE_SHARE *share, f_is_zerofill(pack_flag) != 0, f_is_dec(pack_flag) == 0); case MYSQL_TYPE_LONGLONG: - return new (mem_root) - Field_longlong(ptr,field_length,null_pos,null_bit, - unireg_check, field_name, - f_is_zerofill(pack_flag) != 0, - f_is_dec(pack_flag) == 0); + if (flags & (VERS_SYS_START_FLAG|VERS_SYS_END_FLAG)) + { + return new (mem_root) + Field_vers_trx_id(ptr, field_length, null_pos, null_bit, + unireg_check, field_name, + f_is_zerofill(pack_flag) != 0, + f_is_dec(pack_flag) == 0); + } + else + { + return new (mem_root) + Field_longlong(ptr,field_length,null_pos,null_bit, + unireg_check, field_name, + f_is_zerofill(pack_flag) != 0, + f_is_dec(pack_flag) == 0); + } case MYSQL_TYPE_TIMESTAMP: { uint dec= field_length > MAX_DATETIME_WIDTH ? @@ -10573,6 +10670,11 @@ Field *make_field(TABLE_SHARE *share, return 0; } +bool Field_vers_trx_id::test_if_equality_guarantees_uniqueness(const Item* item) const +{ + return item->type() == Item::DATE_ITEM; +} + /** Create a field suitable for create of table. */ @@ -10594,6 +10696,7 @@ Column_definition::Column_definition(THD *thd, Field *old_field, option_list= old_field->option_list; pack_flag= 0; compression_method_ptr= 0; + versioning= VERSIONING_NOT_SET; if (orig_field) { diff --git a/sql/field.h b/sql/field.h index eb6510bc9a4..93541695353 100644 --- a/sql/field.h +++ b/sql/field.h @@ -673,6 +673,14 @@ public: static void operator delete(void *ptr, MEM_ROOT *mem_root) { DBUG_ASSERT(0); } + /** + Used by System Versioning. + */ + virtual void set_max() + { DBUG_ASSERT(0); } + virtual bool is_max() + { DBUG_ASSERT(0); return false; } + uchar *ptr; // Position to field in record /** Byte where the @c NULL bit is stored inside a record. If this Field is a @@ -1000,6 +1008,13 @@ public: } bool set_explicit_default(Item *value); + virtual my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const + { DBUG_ASSERT(0); return 0; } + my_time_t get_timestamp(ulong *sec_part) const + { + return get_timestamp(ptr, sec_part); + } + /** Evaluates the @c UPDATE default function, if one exists, and stores the result in the record buffer. If no such function exists for the column, @@ -1439,6 +1454,21 @@ public: FIELD_FLAGS_COLUMN_FORMAT; } + bool vers_sys_field() const + { + return flags & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG); + } + + bool vers_update_unversioned() const + { + return flags & VERS_UPDATE_UNVERSIONED_FLAG; + } + + virtual bool vers_trx_id() const + { + return false; + } + /* Validate a non-null field value stored in the given record according to the current thread settings, e.g. sql_mode. @@ -2146,6 +2176,57 @@ public: { return unpack_int64(to, from, from_end); } + + void set_max(); + bool is_max(); +}; + + +class Field_vers_trx_id :public Field_longlong { + MYSQL_TIME cache; + ulonglong cached; +public: + Field_vers_trx_id(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, bool zero_arg, + bool unsigned_arg) + : Field_longlong(ptr_arg, len_arg, null_ptr_arg, null_bit_arg, + unireg_check_arg, field_name_arg, zero_arg, + unsigned_arg), + cached(0) + {} + enum_field_types real_type() const { return MYSQL_TYPE_LONGLONG; } + enum_field_types type() const { return MYSQL_TYPE_LONGLONG;} + uint size_of() const { return sizeof(*this); } + bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate, ulonglong trx_id); + bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate) + { + return get_date(ltime, fuzzydate, (ulonglong) val_int()); + } + bool test_if_equality_guarantees_uniqueness(const Item *item) const; + bool can_optimize_keypart_ref(const Item_bool_func *cond, + const Item *item) const + { + return true; + } + + bool can_optimize_group_min_max(const Item_bool_func *cond, + const Item *const_item) const + { + return true; + } + bool can_optimize_range(const Item_bool_func *cond, + const Item *item, + bool is_eq_func) const + { + return true; + } + /* cmp_type() cannot be TIME_RESULT, because we want to compare this field against + integers. But in all other cases we treat it as TIME_RESULT! */ + bool vers_trx_id() const + { + return true; + } }; @@ -2432,7 +2513,7 @@ public: return res; } /* Get TIMESTAMP field value as seconds since begging of Unix Epoch */ - virtual my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const; + my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const; my_time_t get_timestamp(ulong *sec_part) const { return get_timestamp(ptr, sec_part); @@ -2561,8 +2642,14 @@ public: { return memcmp(a_ptr, b_ptr, pack_length()); } + void set_max(); + bool is_max(); void store_TIME(my_time_t timestamp, ulong sec_part); my_time_t get_timestamp(const uchar *pos, ulong *sec_part) const; + my_time_t get_timestamp(ulong *sec_part) const + { + return get_timestamp(ptr, sec_part); + } uint size_of() const { return sizeof(*this); } }; @@ -3982,7 +4069,8 @@ Field *make_field(TABLE_SHARE *share, MEM_ROOT *mem_root, CHARSET_INFO *cs, Field::geometry_type geom_type, uint srid, Field::utype unireg_check, - TYPELIB *interval, const LEX_CSTRING *field_name); + TYPELIB *interval, const LEX_CSTRING *field_name, + uint32 flags); /* Create field class for CREATE TABLE @@ -4036,6 +4124,12 @@ class Column_definition: public Sql_alloc, public: LEX_CSTRING field_name; LEX_CSTRING comment; // Comment for field + enum enum_column_versioning + { + VERSIONING_NOT_SET, + WITH_VERSIONING, + WITHOUT_VERSIONING + }; Item *on_update; // ON UPDATE NOW() /* At various stages in execution this can be length of field in bytes or @@ -4068,6 +4162,8 @@ public: *default_value, // Default value *check_constraint; // Check constraint + enum_column_versioning versioning; + Column_definition() :Type_handler_hybrid_field_type(&type_handler_null), compression_method_ptr(0), @@ -4077,10 +4173,12 @@ public: interval(0), charset(&my_charset_bin), srid(0), geom_type(Field::GEOM_GEOMETRY), option_list(NULL), pack_flag(0), - vcol_info(0), default_value(0), check_constraint(0) + vcol_info(0), default_value(0), check_constraint(0), + versioning(VERSIONING_NOT_SET) { interval_list.empty(); } + Column_definition(THD *thd, Field *field, Field *orig_field); void set_attributes(const Lex_field_type_st &type, CHARSET_INFO *cs); void create_length_to_internal_length_null() @@ -4207,7 +4305,7 @@ public: (uint32)length, null_pos, null_bit, pack_flag, type_handler(), charset, geom_type, srid, unireg_check, interval, - field_name_arg); + field_name_arg, flags); } Field *make_field(TABLE_SHARE *share, MEM_ROOT *mem_root, const LEX_CSTRING *field_name_arg) const @@ -4550,4 +4648,19 @@ bool check_expression(Virtual_column_info *vcol, LEX_CSTRING *name, #define f_bit_as_char(x) ((x) & FIELDFLAG_TREAT_BIT_AS_CHAR) #define f_is_hex_escape(x) ((x) & FIELDFLAG_HEX_ESCAPE) +inline +ulonglong TABLE::vers_end_id() const +{ + DBUG_ASSERT(versioned_by_engine()); + return static_cast<ulonglong>(vers_end_field()->val_int()); +} + +inline +ulonglong TABLE::vers_start_id() const +{ + DBUG_ASSERT(versioned_by_engine()); + return static_cast<ulonglong>(vers_start_field()->val_int()); +} + + #endif /* FIELD_INCLUDED */ diff --git a/sql/gen_lex_token.cc b/sql/gen_lex_token.cc index 4b5e0746eac..b36df5e94be 100644 --- a/sql/gen_lex_token.cc +++ b/sql/gen_lex_token.cc @@ -130,6 +130,8 @@ void compute_tokens() set_token(WITH_CUBE_SYM, "WITH CUBE"); set_token(WITH_ROLLUP_SYM, "WITH ROLLUP"); + set_token(WITH_SYSTEM_SYM, "WITH SYSTEM"); + set_token(FOR_SYSTEM_TIME_SYM, "FOR SYSTEM_TIME"); set_token(VALUES_IN_SYM, "VALUES IN"); set_token(VALUES_LESS_SYM, "VALUES LESS"); set_token(NOT2_SYM, "!"); diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 2c0d313fd2b..454ff573624 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -155,9 +155,6 @@ static int partition_initialize(void *p) bool Partition_share::init(uint num_parts) { DBUG_ENTER("Partition_share::init"); - mysql_mutex_init(key_partition_auto_inc_mutex, - &auto_inc_mutex, - MY_MUTEX_INIT_FAST); auto_inc_initialized= false; partition_name_hash_initialized= false; next_auto_inc_val= 0; @@ -1285,12 +1282,12 @@ int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, (modelled after mi_check_print_msg) TODO: move this into the handler, or rewrite mysql_admin_table. */ -static bool print_admin_msg(THD* thd, uint len, +bool print_admin_msg(THD* thd, uint len, const char* msg_type, const char* db_name, String &table_name, const char* op_name, const char *fmt, ...) ATTRIBUTE_FORMAT(printf, 7, 8); -static bool print_admin_msg(THD* thd, uint len, +bool print_admin_msg(THD* thd, uint len, const char* msg_type, const char* db_name, String &table_name, const char* op_name, const char *fmt, ...) @@ -4393,6 +4390,15 @@ int ha_partition::update_row(const uchar *old_data, const uchar *new_data) if (error) goto exit; + if (m_part_info->part_type == VERSIONING_PARTITION) + { + uint sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1; + DBUG_ASSERT(m_tot_parts == m_part_info->num_parts * sub_factor); + uint lpart_id= new_part_id / sub_factor; + // lpart_id is VERSIONING partition because new_part_id != old_part_id + m_part_info->vers_update_stats(thd, lpart_id); + } + tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */ error= m_file[old_part_id]->ha_delete_row(old_data); reenable_binlog(thd); @@ -5840,6 +5846,22 @@ int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen) } +int ha_partition::index_read_last_map(uchar *buf, + const uchar *key, + key_part_map keypart_map) +{ + DBUG_ENTER("ha_partition::index_read_last_map"); + + m_ordered= true; // Safety measure + end_range= NULL; + m_index_scan_type= partition_index_read_last; + m_start_key.key= key; + m_start_key.keypart_map= keypart_map; + m_start_key.flag= HA_READ_PREFIX_LAST; + DBUG_RETURN(common_index_read(buf, true)); +} + + /* Read next record when performing index scan backwards diff --git a/sql/ha_partition.h b/sql/ha_partition.h index a13d9bd7618..0b9f1dc3953 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -79,6 +79,8 @@ struct st_partition_ft_info }; +extern PSI_mutex_key key_partition_auto_inc_mutex; + /** Partition specific Handler_share. */ @@ -96,24 +98,86 @@ public: HASH partition_name_hash; /** Storage for each partitions Handler_share */ Parts_share_refs partitions_share_refs; - Partition_share() {} + Partition_share() + : auto_inc_initialized(false), + next_auto_inc_val(0), + partition_name_hash_initialized(false), + partition_names(NULL) + { + mysql_mutex_init(key_partition_auto_inc_mutex, + &auto_inc_mutex, + MY_MUTEX_INIT_FAST); + } + ~Partition_share() { - DBUG_ENTER("Partition_share::~Partition_share"); mysql_mutex_destroy(&auto_inc_mutex); + if (partition_names) + { + my_free(partition_names); + } if (partition_name_hash_initialized) + { my_hash_free(&partition_name_hash); - DBUG_VOID_RETURN; + } } + bool init(uint num_parts); - void lock_auto_inc() + + /** + Release reserved auto increment values not used. + @param thd Thread. + @param table_share Table Share + @param next_insert_id Next insert id (first non used auto inc value). + @param max_reserved End of reserved auto inc range. + */ + void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share, + const ulonglong next_insert_id, + const ulonglong max_reserved); + + /** lock mutex protecting auto increment value next_auto_inc_val. */ + inline void lock_auto_inc() { mysql_mutex_lock(&auto_inc_mutex); } - void unlock_auto_inc() + /** unlock mutex protecting auto increment value next_auto_inc_val. */ + inline void unlock_auto_inc() { mysql_mutex_unlock(&auto_inc_mutex); } + /** + Populate partition_name_hash with partition and subpartition names + from part_info. + @param part_info Partition info containing all partitions metadata. + + @return Operation status. + @retval false Success. + @retval true Failure. + */ + bool populate_partition_name_hash(partition_info *part_info); + /** Get partition name. + + @param part_id Partition id (for subpartitioned table only subpartition + names will be returned.) + + @return partition name or NULL if error. + */ + const char *get_partition_name(size_t part_id) const; +private: + const uchar **partition_names; + /** + Insert [sub]partition name into partition_name_hash + @param name Partition name. + @param part_id Partition id. + @param is_subpart True if subpartition else partition. + + @return Operation status. + @retval false Success. + @retval true Failure. + */ + bool insert_partition_name_in_hash(const char *name, + uint part_id, + bool is_subpart); }; typedef struct st_partition_key_multi_range @@ -679,6 +743,10 @@ public: virtual int index_last(uchar * buf); virtual int index_next_same(uchar * buf, const uchar * key, uint keylen); + int index_read_last_map(uchar *buf, + const uchar *key, + key_part_map keypart_map); + /* read_first_row is virtual method but is only implemented by handler.cc, no storage engine has implemented it so neither @@ -1406,6 +1474,31 @@ public: return h; } + virtual ha_rows part_records(void *_part_elem) + { + partition_element *part_elem= reinterpret_cast<partition_element *>(_part_elem); + DBUG_ASSERT(m_part_info); + uint32 sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1; + uint32 part_id= part_elem->id * sub_factor; + uint32 part_id_end= part_id + sub_factor; + DBUG_ASSERT(part_id_end <= m_tot_parts); + ha_rows part_recs= 0; + for (; part_id < part_id_end; ++part_id) + { + handler *file= m_file[part_id]; + DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); + file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + part_recs+= file->stats.records; + } + return part_recs; + } + + virtual handler* part_handler(uint32 part_id) + { + DBUG_ASSERT(part_id < m_tot_parts); + return m_file[part_id]; + } + friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); friend int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); }; diff --git a/sql/ha_sequence.cc b/sql/ha_sequence.cc index 93f6f32d473..4afa2168b8d 100644 --- a/sql/ha_sequence.cc +++ b/sql/ha_sequence.cc @@ -259,7 +259,8 @@ int ha_sequence::write_row(uchar *buf) sequence->copy(&tmp_seq); rows_changed++; /* We have to do the logging while we hold the sequence mutex */ - error= binlog_log_row(table, 0, buf, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, 0, buf, log_func); row_already_logged= 1; } diff --git a/sql/handler.cc b/sql/handler.cc index 4e059e0e56c..bb52f6bb211 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1414,6 +1414,39 @@ int ha_commit_trans(THD *thd, bool all) goto err; } +#if 1 // FIXME: This should be done in ha_prepare(). + if (rw_trans) + { + ulonglong trx_start_id= 0, trx_end_id= 0; + for (Ha_trx_info *ha_info= trans->ha_list; ha_info; ha_info= ha_info->next()) + { + if (ha_info->ht()->prepare_commit_versioned) + { + trx_end_id= ha_info->ht()->prepare_commit_versioned(thd, &trx_start_id); + if (trx_end_id) + break; // FIXME: use a common ID for cross-engine transactions + } + } + + if (trx_end_id) + { + if (!use_transaction_registry) + { + my_error(ER_VERS_TRT_IS_DISABLED, MYF(0)); + goto err; + } + DBUG_ASSERT(trx_start_id); + TR_table trt(thd, true); + if (trt.update(trx_start_id, trx_end_id)) + goto err; + // Here, the call will not commit inside InnoDB. It is only working + // around closing thd->transaction.stmt open by TR_table::open(). + if (all) + commit_one_phase_2(thd, false, &thd->transaction.stmt, false); + } + } +#endif + if (trans->no_2pc || (rw_ha_count <= 1)) { error= ha_commit_one_phase(thd, all); @@ -3023,6 +3056,25 @@ int handler::update_auto_increment() enum enum_check_fields save_count_cuted_fields; DBUG_ENTER("handler::update_auto_increment"); + // ALTER TABLE ... ADD COLUMN ... AUTO_INCREMENT + if (thd->lex->sql_command == SQLCOM_ALTER_TABLE) + { + if (table->versioned()) + { + Field *end= table->vers_end_field(); + DBUG_ASSERT(end); + bitmap_set_bit(table->read_set, end->field_index); + if (!end->is_max()) + { + if (!table->next_number_field->real_maybe_null()) + DBUG_RETURN(HA_ERR_UNSUPPORTED); + table->next_number_field->set_null(); + DBUG_RETURN(0); + } + table->next_number_field->set_notnull(); + } + } + /* next_insert_id is a "cursor" into the reserved interval, it may go greater than the interval, but not smaller. @@ -3145,7 +3197,7 @@ int handler::update_auto_increment() /* Store field without warning (Warning will be printed by insert) */ save_count_cuted_fields= thd->count_cuted_fields; thd->count_cuted_fields= CHECK_FIELD_IGNORE; - tmp= table->next_number_field->store((longlong) nr, TRUE); + tmp= table->next_number_field->store((longlong)nr, TRUE); thd->count_cuted_fields= save_count_cuted_fields; if (unlikely(tmp)) // Out of range value in store @@ -5723,8 +5775,10 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat) 1 Row needs to be logged */ -inline bool handler::check_table_binlog_row_based(bool binlog_row) +bool handler::check_table_binlog_row_based(bool binlog_row) { + if (table->versioned_by_engine()) + return false; if (unlikely((table->in_use->variables.sql_log_bin_off))) return 0; /* Called by partitioning engine */ if (unlikely((!check_table_binlog_row_based_done))) @@ -5873,10 +5927,10 @@ static int write_locked_table_maps(THD *thd) static int check_wsrep_max_ws_rows(); -static int binlog_log_row_internal(TABLE* table, - const uchar *before_record, - const uchar *after_record, - Log_func *log_func) +int binlog_log_row(TABLE* table, + const uchar *before_record, + const uchar *after_record, + Log_func *log_func) { bool error= 0; THD *const thd= table->in_use; @@ -5911,16 +5965,6 @@ static int binlog_log_row_internal(TABLE* table, return error ? HA_ERR_RBR_LOGGING_FAILED : 0; } -int binlog_log_row(TABLE* table, - const uchar *before_record, - const uchar *after_record, - Log_func *log_func) -{ - if (!table->file->check_table_binlog_row_based(1)) - return 0; - return binlog_log_row_internal(table, before_record, after_record, log_func); -} - int handler::ha_external_lock(THD *thd, int lock_type) { @@ -6070,7 +6114,8 @@ int handler::ha_write_row(uchar *buf) if (likely(!error) && !row_already_logged) { rows_changed++; - error= binlog_log_row(table, 0, buf, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, 0, buf, log_func); } DEBUG_SYNC_C("ha_write_row_end"); DBUG_RETURN(error); @@ -6102,7 +6147,8 @@ int handler::ha_update_row(const uchar *old_data, const uchar *new_data) if (likely(!error) && !row_already_logged) { rows_changed++; - error= binlog_log_row(table, old_data, new_data, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, old_data, new_data, log_func); } return error; } @@ -6157,7 +6203,8 @@ int handler::ha_delete_row(const uchar *buf) if (likely(!error)) { rows_changed++; - error= binlog_log_row(table, buf, 0, log_func); + if (table->file->check_table_binlog_row_based(1)) + error= binlog_log_row(table, buf, 0, log_func); } return error; } @@ -6690,3 +6737,604 @@ int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info) mysql_mutex_unlock(&LOCK_global_index_stats); DBUG_RETURN(res); } + +bool Vers_parse_info::is_trx_start(const char *name) const +{ + DBUG_ASSERT(name); + return as_row.start && as_row.start == LString_i(name); +} +bool Vers_parse_info::is_trx_end(const char *name) const +{ + DBUG_ASSERT(name); + return as_row.end && as_row.end == LString_i(name); +} +bool Vers_parse_info::is_trx_start(const Create_field &f) const +{ + return f.flags & VERS_SYS_START_FLAG; +} +bool Vers_parse_info::is_trx_end(const Create_field &f) const +{ + return f.flags & VERS_SYS_END_FLAG; +} + +static Create_field *vers_init_sys_field(THD *thd, const char *field_name, + int flags, bool integer_fields) +{ + Create_field *f= new (thd->mem_root) Create_field(); + if (!f) + return NULL; + + memset(f, 0, sizeof(*f)); + f->field_name.str= field_name; + f->field_name.length= strlen(field_name); + f->charset= system_charset_info; + f->flags= flags | HIDDEN_FLAG; + if (integer_fields) + { + f->set_handler(&type_handler_longlong); + f->flags|= UNSIGNED_FLAG; + f->length= MY_INT64_NUM_DECIMAL_DIGITS - 1; + } + else + { + f->set_handler(&type_handler_timestamp2); + f->length= MAX_DATETIME_PRECISION; + } + + if (f->check(thd)) + return NULL; + + return f; +} + +static bool vers_create_sys_field(THD *thd, const char *field_name, + Alter_info *alter_info, int flags, + bool integer_fields) +{ + Create_field *f= vers_init_sys_field(thd, field_name, flags, integer_fields); + if (!f) + return true; + + alter_info->flags|= Alter_info::ALTER_ADD_COLUMN; + alter_info->create_list.push_back(f); + + return false; +} + +static bool vers_change_sys_field(THD *thd, const char *field_name, + Alter_info *alter_info, int flags, + bool integer_fields, const char *change) +{ + Create_field *f= vers_init_sys_field(thd, field_name, flags, integer_fields); + if (!f) + return true; + + f->change.str= change; + f->change.length= strlen(change); + + alter_info->flags|= Alter_info::ALTER_CHANGE_COLUMN; + alter_info->create_list.push_back(f); + + return false; +} + +bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info, + bool integer_fields) +{ + // If user specified some of these he must specify the others too. Do nothing. + if (as_row.start || as_row.end || system_time.start || system_time.end) + return false; + + alter_info->flags|= Alter_info::ALTER_ADD_COLUMN; + + static const LString sys_trx_start= "sys_trx_start"; + static const LString sys_trx_end= "sys_trx_end"; + + system_time= start_end_t(sys_trx_start, sys_trx_end); + as_row= system_time; + + return vers_create_sys_field(thd, sys_trx_start, alter_info, + VERS_SYS_START_FLAG, + integer_fields) || + vers_create_sys_field(thd, sys_trx_end, alter_info, + VERS_SYS_END_FLAG, + integer_fields); +} + +bool Vers_parse_info::check_and_fix_implicit( + THD *thd, + Alter_info *alter_info, + HA_CREATE_INFO *create_info, + const char* table_name) +{ + DBUG_ASSERT(!without_system_versioning); + + SELECT_LEX &slex= thd->lex->select_lex; + int vers_tables= 0; + bool from_select= slex.item_list.elements ? true : false; + + if (from_select) + { + for (TABLE_LIST *table= slex.table_list.first; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + vers_tables++; + } + } + + // CREATE ... SELECT: if at least one table in SELECT is versioned, + // then created table will be versioned. + if (thd->variables.vers_force) + { + with_system_versioning= true; + create_info->options|= HA_VERSIONED_TABLE; + } + + // Possibly override default storage engine to match one used in source table. + if (from_select && with_system_versioning && + !(create_info->used_fields & HA_CREATE_USED_ENGINE)) + { + List_iterator_fast<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (is_trx_start(*f) || is_trx_end(*f)) + { + create_info->db_type= f->field->orig_table->file->ht; + break; + } + } + } + + if (!need_check()) + return false; + + if (!versioned_fields && unversioned_fields && !with_system_versioning) + { + // All is correct but this table is not versioned. + create_info->options&= ~HA_VERSIONED_TABLE; + return false; + } + + if ((system_time.start || system_time.end || as_row.start || as_row.end) && + !with_system_versioning) + { + my_error(ER_MISSING, MYF(0), table_name, "WITH SYSTEM VERSIONING"); + return true; + } + + TABLE *orig_table= NULL; + List_iterator<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (is_trx_start(*f)) + { + if (!as_row.start) // not inited in CREATE ... SELECT + { + DBUG_ASSERT(vers_tables > 0); + if (orig_table && orig_table != f->field->orig_table) + { + err_different_tables: + my_error(ER_VERS_DIFFERENT_TABLES, MYF(0), table_name); + return true; + } + orig_table= f->field->orig_table; + as_row.start= f->field_name; + system_time.start= as_row.start; + } + continue; + } + if (is_trx_end(*f)) + { + if (!as_row.end) + { + DBUG_ASSERT(vers_tables > 0); + if (orig_table && orig_table != f->field->orig_table) + { + goto err_different_tables; + } + orig_table= f->field->orig_table; + as_row.end= f->field_name; + system_time.end= as_row.end; + } + continue; + } + + if ((f->versioning == Column_definition::VERSIONING_NOT_SET && + !with_system_versioning) || + f->versioning == Column_definition::WITHOUT_VERSIONING) + { + f->flags|= VERS_UPDATE_UNVERSIONED_FLAG; + } + } + + bool integer_fields= ha_check_storage_engine_flag(create_info->db_type, + HTON_NATIVE_SYS_VERSIONING); + +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (partition_info *info= thd->work_part_info) + { + if (!(create_info->used_fields & HA_CREATE_USED_ENGINE) && + info->partitions.elements) + { + partition_element *element= + static_cast<partition_element *>(info->partitions.elem(0)); + handlerton *hton= element->engine_type; + if (hton && ha_check_storage_engine_flag(hton, HTON_NATIVE_SYS_VERSIONING)) + { + integer_fields= true; + } + } + } +#endif + + if (fix_implicit(thd, alter_info, integer_fields)) + return true; + + int plain_cols= 0; // column doesn't have WITH or WITHOUT SYSTEM VERSIONING + int vers_cols= 0; // column has WITH SYSTEM VERSIONING + it.rewind(); + while (const Create_field *f= it++) + { + if (is_trx_start(*f) || is_trx_end(*f)) + continue; + + if (f->versioning == Column_definition::VERSIONING_NOT_SET) + plain_cols++; + else if (f->versioning == Column_definition::WITH_VERSIONING) + vers_cols++; + } + + bool table_with_system_versioning= + as_row.start || as_row.end || system_time.start || system_time.end; + + if (!thd->lex->tmp_table() && + // CREATE from SELECT (Create_fields are not yet added) + !from_select && + vers_cols == 0 && + (plain_cols == 0 || !table_with_system_versioning)) + { + my_error(ER_VERS_NO_COLS_DEFINED, MYF(0), table_name); + return true; + } + + return check_with_conditions(table_name) || + check_generated_type(table_name, alter_info, integer_fields); +} + +static bool add_field_to_drop_list(THD *thd, Alter_info *alter_info, + Field *field) +{ + DBUG_ASSERT(field); + DBUG_ASSERT(field->field_name.str); + alter_info->flags|= Alter_info::ALTER_DROP_COLUMN; + Alter_drop *ad= new (thd->mem_root) + Alter_drop(Alter_drop::COLUMN, field->field_name.str, false); + return !ad || alter_info->drop_list.push_back(ad, thd->mem_root); +} + +static bool is_dropping_primary_key(Alter_info *alter_info) +{ + List_iterator_fast<Alter_drop> it(alter_info->drop_list); + while (Alter_drop *ad= it++) + { + if (ad->type == Alter_drop::KEY && + !my_strcasecmp(system_charset_info, ad->name, primary_key_name)) + return true; + } + return false; +} + +static bool is_adding_primary_key(Alter_info *alter_info) +{ + List_iterator_fast<Key> it(alter_info->key_list); + while (Key *key= it++) + { + if (key->type == Key::PRIMARY) + return true; + } + return false; +} + +bool Vers_parse_info::check_and_fix_alter(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, + TABLE *table) +{ + TABLE_SHARE *share= table->s; + bool integer_fields= ha_check_storage_engine_flag(create_info->db_type, + HTON_NATIVE_SYS_VERSIONING); + const char *table_name= share->table_name.str; + + if (!need_check() && !share->versioned) + return false; + + if (without_system_versioning) + { + if (!share->versioned) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name); + return true; + } + + if (!(share->vers_start_field()->flags & HIDDEN_FLAG)) + { + my_error(ER_VERS_SYS_FIELD_NOT_HIDDEN, MYF(0), + share->vers_start_field()->field_name.str); + return true; + } + if (!(share->vers_end_field()->flags & HIDDEN_FLAG)) + { + my_error(ER_VERS_SYS_FIELD_NOT_HIDDEN, MYF(0), + share->vers_end_field()->field_name.str); + return true; + } + + if (add_field_to_drop_list(thd, alter_info, share->vers_start_field()) || + add_field_to_drop_list(thd, alter_info, share->vers_end_field())) + return true; + + if (share->primary_key != MAX_KEY && !is_adding_primary_key(alter_info) && + !is_dropping_primary_key(alter_info)) + { + alter_info->flags|= Alter_info::ALTER_DROP_INDEX; + Alter_drop *ad= new (thd->mem_root) + Alter_drop(Alter_drop::KEY, primary_key_name, false); + if (!ad || alter_info->drop_list.push_back(ad, thd->mem_root)) + return true; + + alter_info->flags|= Alter_info::ALTER_ADD_INDEX; + LEX_CSTRING key_name= {NULL, 0}; + DDL_options_st options; + options.init(); + Key *pk= new (thd->mem_root) + Key(Key::PRIMARY, &key_name, HA_KEY_ALG_UNDEF, false, options); + if (!pk) + return true; + + st_key &key= table->key_info[share->primary_key]; + for (st_key_part_info *it= key.key_part, + *end= it + key.user_defined_key_parts; + it != end; ++it) + { + if (it->field->vers_sys_field()) + continue; + + Key_part_spec *key_part_spec= new (thd->mem_root) + Key_part_spec(&it->field->field_name, it->length); + if (!key_part_spec || + pk->columns.push_back(key_part_spec, thd->mem_root)) + return true; + } + + alter_info->key_list.push_back(pk); + } + + return false; + } + + { + List_iterator_fast<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (f->change.length && + f->flags & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG)) + { + if (thd->mdl_context.upgrade_shared_lock( + table->mdl_ticket, MDL_EXCLUSIVE, + thd->variables.lock_wait_timeout)) + return true; + if (table->file->info(HA_STATUS_VARIABLE | HA_STATUS_TIME)) + return true; + if (0 < table->file->records()) + { + my_error(ER_VERS_GENERATED_ALWAYS_NOT_EMPTY, MYF(0), f->change.str); + return true; + } + break; + } + } + } + + if ((versioned_fields || unversioned_fields) && !share->versioned) + { + my_error(ER_VERS_NOT_VERSIONED, MYF(0), table_name); + return true; + } + + if (share->versioned) + { + // copy info from existing table + create_info->options|= HA_VERSIONED_TABLE; + + DBUG_ASSERT(share->vers_start_field() && share->vers_end_field()); + LString start(share->vers_start_field()->field_name); + LString end(share->vers_end_field()->field_name); + DBUG_ASSERT(start.ptr() && end.ptr()); + + as_row= start_end_t(start, end); + system_time= as_row; + + if (alter_info->create_list.elements) + { + List_iterator_fast<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (f->versioning == Column_definition::WITHOUT_VERSIONING) + f->flags|= VERS_UPDATE_UNVERSIONED_FLAG; + + if (f->change.str && (start == f->change || end == f->change)) + { + my_error(ER_VERS_ALTER_SYSTEM_FIELD, MYF(0), f->change.str); + return true; + } + } + } + + if (alter_info->drop_list.elements) + { + bool done_start= false; + bool done_end= false; + List_iterator<Alter_drop> it(alter_info->drop_list); + while (Alter_drop *d= it++) + { + const char *name= d->name; + Field *f= NULL; + if (!done_start && is_trx_start(name)) + { + f= share->vers_start_field(); + done_start= true; + } + else if (!done_end && is_trx_end(name)) + { + f= share->vers_end_field(); + done_end= true; + } + else + continue; + if (f->flags & HIDDEN_FLAG) + { + my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0), d->type_name(), name); + return true; + } + + if (vers_change_sys_field(thd, name, alter_info, + f->flags & + (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG), + integer_fields, name)) + { + return true; + } + + it.remove(); + + if (done_start && done_end) + break; + } + } + + return false; + } + + return fix_implicit(thd, alter_info, integer_fields) || + (with_system_versioning && + (check_with_conditions(table_name) || + check_generated_type(table_name, alter_info, integer_fields))); +} + +bool +Vers_parse_info::fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info, + TABLE_LIST &src_table, TABLE_LIST &table) +{ + List_iterator<Create_field> it(alter_info.create_list); + Create_field *f, *f_start=NULL, *f_end= NULL; + + DBUG_ASSERT(alter_info.create_list.elements > 2); + + if (create_info.tmp_table()) + { + int remove= 2; + while (remove && (f= it++)) + { + if (f->flags & (VERS_SYS_START_FLAG|VERS_SYS_END_FLAG)) + { + it.remove(); + remove--; + } + } + DBUG_ASSERT(remove == 0); + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "System versioning is stripped from temporary `%s.%s`", + table.db, table.table_name); + return false; + } + + while ((f= it++)) + { + if (f->flags & VERS_SYS_START_FLAG) + { + f_start= f; + if (f_end) + break; + } + else if (f->flags & VERS_SYS_END_FLAG) + { + f_end= f; + if (f_start) + break; + } + } + + if (!f_start || !f_end) + { + my_error(ER_MISSING, MYF(0), src_table.table_name, + f_start ? "AS ROW END" : "AS ROW START"); + return true; + } + + as_row= start_end_t(f_start->field_name, f_end->field_name); + system_time= as_row; + + create_info.options|= HA_VERSIONED_TABLE; + return false; +} + + +bool Vers_parse_info::check_with_conditions(const char *table_name) const +{ + if (!as_row.start || !as_row.end) + { + my_error(ER_MISSING, MYF(0), table_name, + as_row.start ? "AS ROW END" : "AS ROW START"); + return true; + } + + if (!system_time.start || !system_time.end) + { + my_error(ER_MISSING, MYF(0), table_name, "PERIOD FOR SYSTEM_TIME"); + return true; + } + + if (as_row.start != system_time.start || as_row.end != system_time.end) + { + my_error(ER_VERS_PERIOD_COLUMNS, MYF(0), as_row.start.str, as_row.end.str); + return true; + } + + return false; +} + +bool Vers_parse_info::check_generated_type(const char *table_name, + Alter_info *alter_info, + bool integer_fields) const +{ + List_iterator<Create_field> it(alter_info->create_list); + while (Create_field *f= it++) + { + if (is_trx_start(*f) || is_trx_end(*f)) + { + if (integer_fields) + { + if (f->type_handler() != &type_handler_longlong || !(f->flags & UNSIGNED_FLAG) || + f->length != (MY_INT64_NUM_DECIMAL_DIGITS - 1)) + { + my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), f->field_name.str, + "BIGINT(20) UNSIGNED", table_name); + return true; + } + } + else + { + if (!(f->type_handler() == &type_handler_datetime2 || + f->type_handler() == &type_handler_timestamp2) || + f->length != MAX_DATETIME_FULL_WIDTH) + { + my_error(ER_VERS_FIELD_WRONG_TYPE, MYF(0), f->field_name.str, + "TIMESTAMP(6)", table_name); + return true; + } + } + } + } + + return false; +} diff --git a/sql/handler.h b/sql/handler.h index a176b45d8db..6fbb61c902c 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -34,6 +34,7 @@ #include "structs.h" /* SHOW_COMP_OPTION */ #include "sql_array.h" /* Dynamic_array<> */ #include "mdl.h" +#include "vers_string.h" #include "sql_analyze_stmt.h" // for Exec_time_tracker @@ -412,6 +413,8 @@ enum enum_alter_inplace_result { #define HA_LEX_CREATE_TMP_TABLE 1U #define HA_CREATE_TMP_ALTER 8U #define HA_LEX_CREATE_SEQUENCE 16U +#define HA_VERSIONED_TABLE 32U +#define HA_VTMD 64U #define HA_MAX_REC_LENGTH 65535 @@ -1400,6 +1403,16 @@ struct handlerton */ int (*discover_table_structure)(handlerton *hton, THD* thd, TABLE_SHARE *share, HA_CREATE_INFO *info); + + /* + System Versioning + */ + /** Determine if system-versioned data was modified by the transaction. + @param[in,out] thd current session + @param[out] trx_id transaction start ID + @return transaction commit ID + @retval 0 if no system-versioned data was affected by the transaction */ + ulonglong (*prepare_commit_versioned)(THD *thd, ulonglong *trx_id); }; @@ -1447,6 +1460,7 @@ handlerton *ha_default_tmp_handlerton(THD *thd); */ #define HTON_NO_BINLOG_ROW_OPT (1 << 9) #define HTON_SUPPORTS_EXTENDED_KEYS (1 <<10) //supports extended keys +#define HTON_NATIVE_SYS_VERSIONING (1 << 11) //Engine supports System Versioning // MySQL compatibility. Unused. #define HTON_SUPPORTS_FOREIGN_KEYS (1 << 0) //Foreign key constraint supported. @@ -1696,6 +1710,86 @@ struct Schema_specification_st } }; +class Create_field; + +struct Vers_parse_info +{ + Vers_parse_info() : + with_system_versioning(false), + without_system_versioning(false), + versioned_fields(false), + unversioned_fields(false) + {} + + struct start_end_t + { + start_end_t() + {} + start_end_t(LEX_CSTRING _start, LEX_CSTRING _end) : + start(_start), + end(_end) {} + LString_i start; + LString_i end; + }; + + start_end_t system_time; + start_end_t as_row; + + void set_period_for_system_time(LString start, LString end) + { + system_time.start= start; + system_time.end= end; + } + +private: + bool is_trx_start(const char *name) const; + bool is_trx_end(const char *name) const; + bool is_trx_start(const Create_field &f) const; + bool is_trx_end(const Create_field &f) const; + bool fix_implicit(THD *thd, Alter_info *alter_info, bool integer_fields); + bool need_check() const + { + return + versioned_fields || + unversioned_fields || + with_system_versioning || + without_system_versioning || + system_time.start || + system_time.end || + as_row.start || + as_row.end; + } + bool check_with_conditions(const char *table_name) const; + bool check_generated_type(const char *table_name, Alter_info *alter_info, + bool integer_fields) const; + +public: + bool check_and_fix_implicit(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, + const char *table_name); + bool check_and_fix_alter(THD *thd, Alter_info *alter_info, + HA_CREATE_INFO *create_info, TABLE *table); + bool fix_create_like(Alter_info &alter_info, HA_CREATE_INFO &create_info, + TABLE_LIST &src_table, TABLE_LIST &table); + + /** User has added 'WITH SYSTEM VERSIONING' to table definition */ + bool with_system_versioning : 1; + + /** Use has added 'WITHOUT SYSTEM VERSIONING' to ALTER TABLE */ + bool without_system_versioning : 1; + + /** + At least one field was specified 'WITH SYSTEM VERSIONING'. Useful for + error handling. + */ + bool versioned_fields : 1; + + /** + At least one field was specified 'WITHOUT SYSTEM VERSIONING'. Useful for + error handling. + */ + bool unversioned_fields : 1; +}; /** A helper struct for table DDL statements, e.g.: @@ -1771,6 +1865,8 @@ struct Table_scope_and_contents_source_st bool table_was_deleted; sequence_definition *seq_create_info; + Vers_parse_info vers_info; + void init() { bzero(this, sizeof(*this)); @@ -1781,6 +1877,16 @@ struct Table_scope_and_contents_source_st db_type= tmp_table() ? ha_default_tmp_handlerton(thd) : ha_default_handlerton(thd); } + + bool versioned() const + { + return options & HA_VERSIONED_TABLE; + } + + bool vtmd() const + { + return options & HA_VTMD; + } }; @@ -2047,6 +2153,8 @@ public: static const HA_ALTER_FLAGS ALTER_DROP_CHECK_CONSTRAINT= 1ULL << 40; + static const HA_ALTER_FLAGS ALTER_DROP_HISTORICAL = 1ULL << 41; + /** Create options (like MAX_ROWS) for the new version of table. @@ -3295,6 +3403,18 @@ protected: virtual int index_last(uchar * buf) { return HA_ERR_WRONG_COMMAND; } virtual int index_next_same(uchar *buf, const uchar *key, uint keylen); + /** + @brief + The following functions works like index_read, but it find the last + row with the current key value or prefix. + @returns @see index_read_map(). + */ + virtual int index_read_last_map(uchar * buf, const uchar * key, + key_part_map keypart_map) + { + uint key_len= calculate_key_len(table, active_index, key, keypart_map); + return index_read_last(buf, key, key_len); + } virtual int close(void)=0; inline void update_rows_read() { @@ -3376,7 +3496,7 @@ public: virtual int pre_ft_end() { return 0; } virtual FT_INFO *ft_init_ext(uint flags, uint inx,String *key) { return NULL; } -private: +public: virtual int ft_read(uchar *buf) { return HA_ERR_WRONG_COMMAND; } virtual int rnd_next(uchar *buf)=0; virtual int rnd_pos(uchar * buf, uchar *pos)=0; @@ -4150,8 +4270,8 @@ protected: virtual int delete_table(const char *name); public: - inline bool check_table_binlog_row_based(bool binlog_row); -private: + bool check_table_binlog_row_based(bool binlog_row); + /* Cache result to avoid extra calls */ inline void mark_trx_read_write() { @@ -4161,6 +4281,8 @@ private: mark_trx_read_write_internal(); } } + +private: void mark_trx_read_write_internal(); bool check_table_binlog_row_based_internal(bool binlog_row); @@ -4322,6 +4444,11 @@ protected: virtual int index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag) { return HA_ERR_WRONG_COMMAND; } + virtual int index_read_last(uchar * buf, const uchar * key, uint key_len) + { + my_errno= HA_ERR_WRONG_COMMAND; + return HA_ERR_WRONG_COMMAND; + } friend class ha_partition; friend class ha_sequence; public: @@ -4445,6 +4572,15 @@ public: */ virtual int find_unique_row(uchar *record, uint unique_ref) { return -1; /*unsupported */} + + bool native_versioned() const + { DBUG_ASSERT(ht); return partition_ht()->flags & HTON_NATIVE_SYS_VERSIONING; } + virtual ha_rows part_records(void *_part_elem) + { DBUG_ASSERT(0); return false; } + virtual handler* part_handler(uint32 part_id) + { DBUG_ASSERT(0); return NULL; } + virtual void update_partition(uint part_id) + {} protected: Handler_share *get_ha_share_ptr(); void set_ha_share_ptr(Handler_share *arg_ha_share); diff --git a/sql/item.cc b/sql/item.cc index 4c2a91dcee7..cde9170b990 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -6049,6 +6049,7 @@ bool Item_field::fix_fields(THD *thd, Item **reference) expression to 'reference', i.e. it substitute that expression instead of this Item_field */ + DBUG_ASSERT(context); if ((from_field= find_field_in_tables(thd, this, context->first_name_resolution_table, context->last_name_resolution_table, @@ -6903,7 +6904,7 @@ int Item_int::save_in_field(Field *field, bool no_conversions) Item *Item_int::clone_item(THD *thd) { - return new (thd->mem_root) Item_int(thd, name.str, value, max_length); + return new (thd->mem_root) Item_int(thd, name.str, value, max_length, unsigned_flag); } @@ -7233,6 +7234,26 @@ bool Item_temporal_literal::eq(const Item *item, bool binary_cmp) const &((Item_temporal_literal *) item)->cached_time); } +bool Item_temporal_literal::operator<(const MYSQL_TIME <ime) const +{ + if (my_time_compare(&cached_time, <ime) < 0) + return true; + return false; +} + +bool Item_temporal_literal::operator>(const MYSQL_TIME <ime) const +{ + if (my_time_compare(&cached_time, <ime) > 0) + return true; + return false; +} + +bool Item_temporal_literal::operator==(const MYSQL_TIME <ime) const +{ + if (my_time_compare(&cached_time, <ime) == 0) + return true; + return false; +} void Item_date_literal::print(String *str, enum_query_type query_type) { @@ -10592,6 +10613,35 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) return find_matching_grouping_field(this, sel) != NULL; } +Item *Item_field::vers_transformer(THD *thd, uchar *) +{ + if (!field) + return this; + + if (field->vers_update_unversioned() && context && + field->table->pos_in_table_list && + field->table->pos_in_table_list->vers_conditions) + { + push_warning_printf( + current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_NON_VERSIONED_FIELD_IN_VERSIONED_QUERY, + ER_THD(current_thd, ER_NON_VERSIONED_FIELD_IN_VERSIONED_QUERY), + field_name.str); + + Item *null_item= new (thd->mem_root) Item_null(thd); + if (null_item) + return null_item; + } + + return this; +} + +bool Item_field::vers_trx_id() const +{ + DBUG_ASSERT(field); + return field->vers_trx_id(); +} + void Item::register_in(THD *thd) { next= thd->free_list; diff --git a/sql/item.h b/sql/item.h index fb7edfd3d1a..d178d9f97d6 100644 --- a/sql/item.h +++ b/sql/item.h @@ -301,6 +301,28 @@ public: } }; +class Name_resolution_context_backup +{ + Name_resolution_context &ctx; + TABLE_LIST &table_list; + table_map save_map; + Name_resolution_context_state ctx_state; + +public: + Name_resolution_context_backup(Name_resolution_context &_ctx, TABLE_LIST &_table_list) + : ctx(_ctx), table_list(_table_list), save_map(_table_list.map) + { + ctx_state.save_state(&ctx, &table_list); + ctx.table_list= &table_list; + ctx.first_name_resolution_table= &table_list; + } + ~Name_resolution_context_backup() + { + ctx_state.restore_state(&ctx, &table_list); + table_list.map= save_map; + } +}; + /* This enum is used to report information about monotonicity of function @@ -541,7 +563,6 @@ public: String_copier_for_item(THD *thd): m_thd(thd) { } }; - class Item: public Value_source, public Type_all_attributes { @@ -774,6 +795,10 @@ public: return type_handler()->field_type(); } virtual const Type_handler *type_handler() const= 0; + virtual uint field_flags() const + { + return 0; + } const Type_handler *type_handler_for_comparison() const { return type_handler()->type_handler_for_comparison(); @@ -1742,6 +1767,10 @@ public: virtual Item_field *field_for_view_update() { return 0; } + virtual Item *vers_transformer(THD *thd, uchar *) + { return this; } + virtual bool vers_trx_id() const + { return false; } virtual Item *neg_transformer(THD *thd) { return NULL; } virtual Item *update_value_transformer(THD *thd, uchar *select_arg) { return this; } @@ -2803,6 +2832,10 @@ public: return field->type_handler(); } TYPELIB *get_typelib() const { return field->get_typelib(); } + uint32 field_flags() const + { + return field->flags; + } enum_monotonicity_info get_monotonicity_info() const { return MONOTONIC_STRICT_INCREASING; @@ -2899,6 +2932,8 @@ public: uint32 max_display_length() const { return field->max_display_length(); } Item_field *field_for_view_update() { return this; } int fix_outer_field(THD *thd, Field **field, Item **reference); + virtual Item *vers_transformer(THD *thd, uchar *); + virtual bool vers_trx_id() const; virtual Item *update_value_transformer(THD *thd, uchar *select_arg); Item *derived_field_transformer_for_having(THD *thd, uchar *arg); Item *derived_field_transformer_for_where(THD *thd, uchar *arg); @@ -3463,6 +3498,14 @@ public: name.str= str_arg; name.length= safe_strlen(name.str); fixed= 1; } + Item_int(THD *thd, const char *str_arg,longlong i,uint length, bool flag): + Item_num(thd), value(i) + { + max_length=length; + name.str= str_arg; name.length= safe_strlen(name.str); + fixed= 1; + unsigned_flag= flag; + } Item_int(THD *thd, const char *str_arg, uint length=64); enum Type type() const { return INT_ITEM; } const Type_handler *type_handler() const @@ -3913,10 +3956,10 @@ class Item_return_date_time :public Item_partition_func_safe_string enum_field_types date_time_field_type; public: Item_return_date_time(THD *thd, const char *name_arg, uint length_arg, - enum_field_types field_type_arg): + enum_field_types field_type_arg, uint dec_arg= 0): Item_partition_func_safe_string(thd, name_arg, length_arg, &my_charset_bin), date_time_field_type(field_type_arg) - { decimals= 0; } + { decimals= dec_arg; } const Type_handler *type_handler() const { return Type_handler::get_handler_by_field_type(date_time_field_type); @@ -4157,6 +4200,13 @@ public: { return val_decimal_from_date(decimal_value); } int save_in_field(Field *field, bool no_conversions) { return save_date_in_field(field, no_conversions); } + void set_time(MYSQL_TIME *ltime) + { + cached_time= *ltime; + } + bool operator>(const MYSQL_TIME <ime) const; + bool operator<(const MYSQL_TIME <ime) const; + bool operator==(const MYSQL_TIME <ime) const; }; @@ -4216,7 +4266,7 @@ public: class Item_datetime_literal: public Item_temporal_literal { public: - Item_datetime_literal(THD *thd, MYSQL_TIME *ltime, uint dec_arg): + Item_datetime_literal(THD *thd, MYSQL_TIME *ltime, uint dec_arg= 0): Item_temporal_literal(thd, ltime, dec_arg) { max_length= MAX_DATETIME_WIDTH + (decimals ? decimals + 1 : 0); @@ -4716,6 +4766,12 @@ public: return 0; return cleanup_processor(arg); } + virtual bool vers_trx_id() const + { + DBUG_ASSERT(ref); + DBUG_ASSERT(*ref); + return (*ref)->vers_trx_id(); + } }; @@ -5232,6 +5288,7 @@ public: #include "item_xmlfunc.h" #include "item_jsonfunc.h" #include "item_create.h" +#include "item_vers.h" #endif /** @@ -6172,37 +6229,59 @@ class Item_type_holder: public Item, { protected: TYPELIB *enum_set_typelib; +private: + void init_flags(Item *item) + { + if (item->real_type() == Item::FIELD_ITEM) + { + Item_field *item_field= (Item_field *)item->real_item(); + m_flags|= (item_field->field->flags & + (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG)); + // TODO: additional field flag? + m_vers_trx_id= item_field->field->vers_trx_id(); + } + } public: Item_type_holder(THD *thd, Item *item) :Item(thd, item), Type_handler_hybrid_field_type(item->real_type_handler()), - enum_set_typelib(0) + enum_set_typelib(0), + m_flags(0), + m_vers_trx_id(false) { DBUG_ASSERT(item->fixed); maybe_null= item->maybe_null; + init_flags(item); } Item_type_holder(THD *thd, - const LEX_CSTRING *name_arg, + Item *item, const Type_handler *handler, const Type_all_attributes *attr, bool maybe_null_arg) :Item(thd), Type_handler_hybrid_field_type(handler), Type_geometry_attributes(handler, attr), - enum_set_typelib(attr->get_typelib()) + enum_set_typelib(attr->get_typelib()), + m_flags(0), + m_vers_trx_id(false) { - name= *name_arg; + name= item->name; Type_std_attributes::set(*attr); maybe_null= maybe_null_arg; + init_flags(item); } const Type_handler *type_handler() const { - const Type_handler *handler= Type_handler_hybrid_field_type::type_handler(); + const Type_handler *handler= m_vers_trx_id ? + &type_handler_vers_trx_id : + Type_handler_hybrid_field_type::type_handler(); return handler->type_handler_for_item_field(); } const Type_handler *real_type_handler() const { + if (m_vers_trx_id) + return &type_handler_vers_trx_id; return Type_handler_hybrid_field_type::type_handler(); } @@ -6227,6 +6306,19 @@ public: Type_geometry_attributes::set_geometry_type(type); } Item* get_copy(THD *thd) { return 0; } + +private: + uint m_flags; + bool m_vers_trx_id; +public: + uint32 field_flags() const + { + return m_flags; + } + virtual bool vers_trx_id() const + { + return m_vers_trx_id; + } }; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 191616365fd..d38b0100246 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -127,9 +127,12 @@ Type_handler_hybrid_field_type::aggregate_for_comparison(const char *funcname, many cases. */ set_handler(items[0]->type_handler()->type_handler_for_comparison()); + m_vers_trx_id= items[0]->vers_trx_id(); for (uint i= 1 ; i < nitems ; i++) { unsigned_count+= items[i]->unsigned_flag; + if (!m_vers_trx_id) + m_vers_trx_id= items[i]->vers_trx_id(); if (aggregate_for_comparison(items[i]->type_handler()-> type_handler_for_comparison())) { @@ -421,7 +424,7 @@ void Item_func::convert_const_compared_to_int_field(THD *thd) args[field= 1]->real_item()->type() == FIELD_ITEM) { Item_field *field_item= (Item_field*) (args[field]->real_item()); - if ((field_item->field_type() == MYSQL_TYPE_LONGLONG || + if (((field_item->field_type() == MYSQL_TYPE_LONGLONG && !field_item->vers_trx_id()) || field_item->field_type() == MYSQL_TYPE_YEAR)) convert_const_to_int(thd, field_item, &args[!field]); } @@ -5290,7 +5293,6 @@ bool fix_escape_item(THD *thd, Item *escape_item, String *tmp_str, return FALSE; } - bool Item_func_like::fix_fields(THD *thd, Item **ref) { DBUG_ASSERT(fixed == 0); diff --git a/sql/item_create.cc b/sql/item_create.cc index 5d6d9742c7a..d73a5327d25 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -43,38 +43,6 @@ */ /** - Adapter for native functions with a variable number of arguments. - The main use of this class is to discard the following calls: - <code>foo(expr1 AS name1, expr2 AS name2, ...)</code> - which are syntactically correct (the syntax can refer to a UDF), - but semantically invalid for native functions. -*/ - -class Create_native_func : public Create_func -{ -public: - virtual Item *create_func(THD *thd, LEX_CSTRING *name, - List<Item> *item_list); - - /** - Builder method, with no arguments. - @param thd The current thread - @param name The native function name - @param item_list The function parameters, none of which are named - @return An item representing the function call - */ - virtual Item *create_native(THD *thd, LEX_CSTRING *name, - List<Item> *item_list) = 0; - -protected: - /** Constructor. */ - Create_native_func() {} - /** Destructor. */ - virtual ~Create_native_func() {} -}; - - -/** Adapter for functions that takes exactly zero arguments. */ @@ -6827,12 +6795,6 @@ Create_func_year_week::create_native(THD *thd, LEX_CSTRING *name, } -struct Native_func_registry -{ - LEX_CSTRING name; - Create_func *builder; -}; - #define BUILDER(F) & F::s_singleton #ifdef HAVE_SPATIAL @@ -7218,8 +7180,6 @@ get_native_fct_hash_key(const uchar *buff, size_t *length, int item_create_init() { - Native_func_registry *func; - DBUG_ENTER("item_create_init"); if (my_hash_init(& native_functions_hash, @@ -7232,7 +7192,16 @@ int item_create_init() MYF(0))) DBUG_RETURN(1); - for (func= func_array; func->builder != NULL; func++) + DBUG_RETURN(item_create_append(func_array)); +} + +int item_create_append(Native_func_registry array[]) +{ + Native_func_registry *func; + + DBUG_ENTER("item_create_append"); + + for (func= array; func->builder != NULL; func++) { if (my_hash_insert(& native_functions_hash, (uchar*) func)) DBUG_RETURN(1); diff --git a/sql/item_create.h b/sql/item_create.h index 128a19a1c15..e0beca37082 100644 --- a/sql/item_create.h +++ b/sql/item_create.h @@ -19,6 +19,8 @@ #ifndef ITEM_CREATE_H #define ITEM_CREATE_H +#include "item_func.h" // Cast_target + typedef struct st_udf_func udf_func; /** @@ -67,6 +69,38 @@ protected: /** + Adapter for native functions with a variable number of arguments. + The main use of this class is to discard the following calls: + <code>foo(expr1 AS name1, expr2 AS name2, ...)</code> + which are syntactically correct (the syntax can refer to a UDF), + but semantically invalid for native functions. +*/ + +class Create_native_func : public Create_func +{ +public: + virtual Item *create_func(THD *thd, LEX_CSTRING *name, + List<Item> *item_list); + + /** + Builder method, with no arguments. + @param thd The current thread + @param name The native function name + @param item_list The function parameters, none of which are named + @return An item representing the function call + */ + virtual Item *create_native(THD *thd, LEX_CSTRING *name, + List<Item> *item_list) = 0; + +protected: + /** Constructor. */ + Create_native_func() {} + /** Destructor. */ + virtual ~Create_native_func() {} +}; + + +/** Function builder for qualified functions. This builder is used with functions call using a qualified function name syntax, as in <code>db.func(expr, expr, ...)</code>. @@ -172,7 +206,14 @@ Item *create_temporal_literal(THD *thd, const String *str, type, send_error); } +struct Native_func_registry +{ + LEX_STRING name; + Create_func *builder; +}; + int item_create_init(); +int item_create_append(Native_func_registry array[]); void item_create_cleanup(); Item *create_func_dyncol_create(THD *thd, List<DYNCALL_CREATE_DEF> &list); diff --git a/sql/item_func.h b/sql/item_func.h index 07c6b5bb8d3..f6640c9b719 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -30,6 +30,9 @@ extern "C" /* Bug in BSDI include file */ } #endif +#include "sql_udf.h" // udf_handler +#include "my_decimal.h" // string2my_decimal + class Item_func :public Item_func_or_sum { diff --git a/sql/item_vers.cc b/sql/item_vers.cc new file mode 100644 index 00000000000..8aa1321e396 --- /dev/null +++ b/sql/item_vers.cc @@ -0,0 +1,182 @@ +/* Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + + +/** + @brief + System Versioning items +*/ + +#include "mariadb.h" +#include "sql_priv.h" + +#include "sql_class.h" +#include "tztime.h" +#include "item.h" + +Item_func_vtq_ts::Item_func_vtq_ts(THD *thd, Item* a, TR_table::field_id_t _vtq_field) : + Item_datetimefunc(thd, a), + vtq_field(_vtq_field) +{ + decimals= 6; + null_value= true; + DBUG_ASSERT(arg_count == 1 && args[0]); +} + + +bool +Item_func_vtq_ts::get_date(MYSQL_TIME *res, ulonglong fuzzy_date) +{ + THD *thd= current_thd; // can it differ from constructor's? + DBUG_ASSERT(thd); + DBUG_ASSERT(args[0]); + if (args[0]->result_type() != INT_RESULT) + { + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + args[0]->type_handler()->name().ptr(), + func_name()); + return true; + } + ulonglong trx_id= args[0]->val_uint(); + if (trx_id == ULONGLONG_MAX) + { + null_value= false; + thd->variables.time_zone->gmt_sec_to_TIME(res, TIMESTAMP_MAX_VALUE); + res->second_part= TIME_MAX_SECOND_PART; + return false; + } + + TR_table trt(thd); + + null_value= !trt.query(trx_id); + if (null_value) + { + my_error(ER_VERS_NO_TRX_ID, MYF(0), trx_id); + return true; + } + + return trt[vtq_field]->get_date(res, fuzzy_date); +} + + +Item_func_vtq_id::Item_func_vtq_id(THD *thd, Item* a, TR_table::field_id_t _vtq_field, + bool _backwards) : + Item_longlong_func(thd, a), + vtq_field(_vtq_field), + backwards(_backwards) +{ + decimals= 0; + unsigned_flag= 1; + null_value= true; + DBUG_ASSERT(arg_count == 1 && args[0]); +} + +Item_func_vtq_id::Item_func_vtq_id(THD *thd, Item* a, Item* b, TR_table::field_id_t _vtq_field) : + Item_longlong_func(thd, a, b), + vtq_field(_vtq_field), + backwards(false) +{ + decimals= 0; + unsigned_flag= 1; + null_value= true; + DBUG_ASSERT(arg_count == 2 && args[0] && args[1]); +} + +longlong +Item_func_vtq_id::get_by_trx_id(ulonglong trx_id) +{ + THD *thd= current_thd; + DBUG_ASSERT(thd); + + if (trx_id == ULONGLONG_MAX) + { + null_value= true; + return 0; + } + + TR_table trt(thd); + null_value= !trt.query(trx_id); + if (null_value) + return 0; + + return trt[vtq_field]->val_int(); +} + +longlong +Item_func_vtq_id::get_by_commit_ts(MYSQL_TIME &commit_ts, bool backwards) +{ + THD *thd= current_thd; + DBUG_ASSERT(thd); + + TR_table trt(thd); + null_value= !trt.query(commit_ts, backwards); + if (null_value) + return 0; + + return trt[vtq_field]->val_int(); +} + +longlong +Item_func_vtq_id::val_int() +{ + if (args[0]->is_null()) + { + if (arg_count < 2 || vtq_field == TR_table::FLD_TRX_ID) + { + null_value= true; + return 0; + } + return get_by_trx_id(args[1]->val_uint()); + } + else + { + MYSQL_TIME commit_ts; + if (args[0]->get_date(&commit_ts, 0)) + { + null_value= true; + return 0; + } + if (arg_count > 1) + { + backwards= args[1]->val_bool(); + DBUG_ASSERT(arg_count == 2); + } + return get_by_commit_ts(commit_ts, backwards); + } +} + +Item_func_vtq_trx_sees::Item_func_vtq_trx_sees(THD *thd, Item* a, Item* b) : + Item_bool_func(thd, a, b), + accept_eq(false) +{ + null_value= true; + DBUG_ASSERT(arg_count == 2 && args[0] && args[1]); +} + +longlong +Item_func_vtq_trx_sees::val_int() +{ + THD *thd= current_thd; + DBUG_ASSERT(thd); + + DBUG_ASSERT(arg_count > 1); + ulonglong trx_id1= args[0]->val_uint(); + ulonglong trx_id0= args[1]->val_uint(); + bool result= accept_eq; + + TR_table trt(thd); + null_value= trt.query_sees(result, trx_id1, trx_id0); + return result; +} diff --git a/sql/item_vers.h b/sql/item_vers.h new file mode 100644 index 00000000000..39ed4ecda1f --- /dev/null +++ b/sql/item_vers.h @@ -0,0 +1,114 @@ +#ifndef ITEM_VERS_INCLUDED +#define ITEM_VERS_INCLUDED +/* Copyright (c) 2017, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + + +/* System Versioning items */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +class Item_func_vtq_ts: public Item_datetimefunc +{ + TR_table::field_id_t vtq_field; +public: + Item_func_vtq_ts(THD *thd, Item* a, TR_table::field_id_t _vtq_field); + const char *func_name() const + { + if (vtq_field == TR_table::FLD_BEGIN_TS) + { + return "vtq_begin_ts"; + } + return "vtq_commit_ts"; + } + bool get_date(MYSQL_TIME *res, ulonglong fuzzy_date); + Item *get_copy(THD *thd) + { return get_item_copy<Item_func_vtq_ts>(thd, this); } + void fix_length_and_dec() { fix_attributes_datetime(decimals); } +}; + +class Item_func_vtq_id : public Item_longlong_func +{ + TR_table::field_id_t vtq_field; + bool backwards; + + longlong get_by_trx_id(ulonglong trx_id); + longlong get_by_commit_ts(MYSQL_TIME &commit_ts, bool backwards); + +public: + Item_func_vtq_id(THD *thd, Item* a, TR_table::field_id_t _vtq_field, bool _backwards= false); + Item_func_vtq_id(THD *thd, Item* a, Item* b, TR_table::field_id_t _vtq_field); + + const char *func_name() const + { + switch (vtq_field) + { + case TR_table::FLD_TRX_ID: + return "vtq_trx_id"; + case TR_table::FLD_COMMIT_ID: + return "vtq_commit_id"; + case TR_table::FLD_ISO_LEVEL: + return "vtq_iso_level"; + default: + DBUG_ASSERT(0); + } + return NULL; + } + + void fix_length_and_dec() + { + Item_int_func::fix_length_and_dec(); + max_length= 20; + } + + longlong val_int(); + Item *get_copy(THD *thd) + { return get_item_copy<Item_func_vtq_id>(thd, this); } +}; + +class Item_func_vtq_trx_sees : public Item_bool_func +{ +protected: + bool accept_eq; + +public: + Item_func_vtq_trx_sees(THD *thd, Item* a, Item* b); + const char *func_name() const + { + return "vtq_trx_sees"; + } + longlong val_int(); + Item *get_copy(THD *thd) + { return get_item_copy<Item_func_vtq_trx_sees>(thd, this); } +}; + +class Item_func_vtq_trx_sees_eq : + public Item_func_vtq_trx_sees +{ +public: + Item_func_vtq_trx_sees_eq(THD *thd, Item* a, Item* b) : + Item_func_vtq_trx_sees(thd, a, b) + { + accept_eq= true; + } + const char *func_name() const + { + return "vtq_trx_sees_eq"; + } +}; + +#endif /* ITEM_VERS_INCLUDED */ diff --git a/sql/lex.h b/sql/lex.h index a0068069660..d31625292e3 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -459,6 +459,7 @@ static SYMBOL symbols[] = { { "PAGE_CHECKSUM", SYM(PAGE_CHECKSUM_SYM)}, { "PARSER", SYM(PARSER_SYM)}, { "PARSE_VCOL_EXPR", SYM(PARSE_VCOL_EXPR_SYM)}, + { "PERIOD", SYM(PERIOD_SYM)}, { "PARTIAL", SYM(PARTIAL)}, { "PARTITION", SYM(PARTITION_SYM)}, { "PARTITIONING", SYM(PARTITIONING_SYM)}, @@ -628,6 +629,8 @@ static SYMBOL symbols[] = { { "SUSPEND", SYM(SUSPEND_SYM)}, { "SWAPS", SYM(SWAPS_SYM)}, { "SWITCHES", SYM(SWITCHES_SYM)}, + { "SYSTEM", SYM(SYSTEM)}, + { "SYSTEM_TIME", SYM(SYSTEM_TIME_SYM)}, { "TABLE", SYM(TABLE_SYM)}, { "TABLE_NAME", SYM(TABLE_NAME_SYM)}, { "TABLES", SYM(TABLES)}, @@ -693,6 +696,7 @@ static SYMBOL symbols[] = { { "VIA", SYM(VIA_SYM)}, { "VIEW", SYM(VIEW_SYM)}, { "VIRTUAL", SYM(VIRTUAL_SYM)}, + { "VERSIONING", SYM(VERSIONING_SYM)}, { "WAIT", SYM(WAIT_SYM)}, { "WARNINGS", SYM(WARNINGS)}, { "WEEK", SYM(WEEK_SYM)}, @@ -703,6 +707,7 @@ static SYMBOL symbols[] = { { "WINDOW", SYM(WINDOW_SYM)}, { "WITH", SYM(WITH)}, { "WITHIN", SYM(WITHIN)}, + { "WITHOUT", SYM(WITHOUT)}, { "WORK", SYM(WORK_SYM)}, { "WRAPPER", SYM(WRAPPER_SYM)}, { "WRITE", SYM(WRITE_SYM)}, diff --git a/sql/log_event.cc b/sql/log_event.cc index f1ceaec6456..397d1b6bef5 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -44,6 +44,7 @@ #include <strfunc.h> #include "compat56.h" #include "wsrep_mysqld.h" +#include "sql_insert.h" #endif /* MYSQL_CLIENT */ #include <my_bitmap.h> @@ -12504,6 +12505,23 @@ Rows_log_event::write_row(rpl_group_info *rgi, DBUG_RETURN(HA_ERR_GENERIC); // in case if error is not set yet } + // Handle INSERT. + // Set vers fields when replicating from not system-versioned table. + if (m_type == WRITE_ROWS_EVENT_V1 && table->versioned_by_sql()) + { + ulong sec_part; + bitmap_set_bit(table->read_set, table->vers_start_field()->field_index); + // Check whether a row came from unversioned table and fix vers fields. + if (table->vers_start_field()->get_timestamp(&sec_part) == 0 && sec_part == 0) + { + bitmap_set_bit(table->write_set, table->vers_start_field()->field_index); + bitmap_set_bit(table->write_set, table->vers_end_field()->field_index); + thd->set_current_time(); + table->vers_start_field()->set_time(); + table->vers_end_field()->set_max(); + } + } + /* Try to write record. If a corresponding record already exists in the table, we try to change it using ha_update_row() if possible. Otherwise we delete @@ -12824,7 +12842,10 @@ static bool record_compare(TABLE *table) /* Compare fields */ for (Field **ptr=table->field ; *ptr ; ptr++) { - + if (table->versioned() && (*ptr)->vers_sys_field()) + { + continue; + } /** We only compare field contents that are not null. NULL fields (i.e., their null bits) were compared @@ -13019,6 +13040,27 @@ int Rows_log_event::find_row(rpl_group_info *rgi) prepare_record(table, m_width, FALSE); error= unpack_current_row(rgi); + m_vers_from_plain= false; + if (table->versioned()) + { + Field *sys_trx_end= table->vers_end_field(); + DBUG_ASSERT(table->read_set); + bitmap_set_bit(table->read_set, sys_trx_end->field_index); + // check whether master table is unversioned + if (sys_trx_end->val_int() == 0) + { + // sys_trx_start initialized with NULL when came from plain table. + // Set it notnull() because record_compare() count NULLs. + table->vers_start_field()->set_notnull(); + bitmap_set_bit(table->write_set, sys_trx_end->field_index); + // Plain source table may have a PRIMARY KEY. And sys_trx_end is always + // a part of PRIMARY KEY. Set it to max value for engine to find it in + // index. Needed for an UPDATE/DELETE cases. + table->vers_end_field()->set_max(); + m_vers_from_plain= true; + } + } + DBUG_PRINT("info",("looking for the following record")); DBUG_DUMP("record[0]", table->record[0], table->s->reclength); @@ -13400,7 +13442,19 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) if (!error) { m_table->mark_columns_per_binlog_row_image(); - error= m_table->file->ha_delete_row(m_table->record[0]); + if (m_vers_from_plain && m_table->versioned_by_sql()) + { + Field *end= m_table->vers_end_field(); + bitmap_set_bit(m_table->write_set, end->field_index); + store_record(m_table, record[1]); + end->set_time(); + error= m_table->file->ha_update_row(m_table->record[1], + m_table->record[0]); + } + else + { + error= m_table->file->ha_delete_row(m_table->record[0]); + } m_table->default_column_bitmaps(); } if (invoke_triggers && !error && @@ -13657,9 +13711,22 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) memcpy(m_table->write_set->bitmap, m_cols_ai.bitmap, (m_table->write_set->n_bits + 7) / 8); m_table->mark_columns_per_binlog_row_image(); + if (m_vers_from_plain && m_table->versioned_by_sql()) + { + bitmap_set_bit(m_table->write_set, + m_table->vers_start_field()->field_index); + thd->set_current_time(); + m_table->vers_start_field()->set_time(); + } error= m_table->file->ha_update_row(m_table->record[1], m_table->record[0]); if (error == HA_ERR_RECORD_IS_THE_SAME) error= 0; + if (m_vers_from_plain && m_table->versioned_by_sql()) + { + store_record(m_table, record[2]); + error= vers_insert_history_row(m_table); + restore_record(m_table, record[2]); + } m_table->default_column_bitmaps(); if (invoke_triggers && !error && diff --git a/sql/log_event.h b/sql/log_event.h index c8f3241cb3d..2dedd8bbe9a 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -4594,6 +4594,8 @@ protected: uchar *m_extra_row_data; /* Pointer to extra row data if any */ /* If non null, first byte is length */ + bool m_vers_from_plain; + /* helper functions */ @@ -4744,6 +4746,7 @@ public: __attribute__((unused)), const uchar *after_record) { + DBUG_ASSERT(!table->versioned_by_engine()); return thd->binlog_write_row(table, is_transactional, after_record); } #endif @@ -4825,6 +4828,7 @@ public: const uchar *before_record, const uchar *after_record) { + DBUG_ASSERT(!table->versioned_by_engine()); return thd->binlog_update_row(table, is_transactional, before_record, after_record); } @@ -4914,6 +4918,7 @@ public: const uchar *after_record __attribute__((unused))) { + DBUG_ASSERT(!table->versioned_by_engine()); return thd->binlog_delete_row(table, is_transactional, before_record); } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index c7fbf0a594c..ad76d043cc7 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -100,6 +100,8 @@ #include "rpl_handler.h" +#include "transaction.h" + #ifdef HAVE_SYS_PRCTL_H #include <sys/prctl.h> #endif @@ -535,6 +537,8 @@ ulonglong slave_skipped_errors; ulong feature_files_opened_with_delayed_keys= 0, feature_check_constraint= 0; ulonglong denied_connections; my_decimal decimal_zero; +my_bool opt_transaction_registry= 1; +my_bool use_transaction_registry= 1; /* Maximum length of parameter value which can be set through @@ -947,6 +951,9 @@ PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered, key_LOCK_slave_background; PSI_mutex_key key_TABLE_SHARE_LOCK_share; +PSI_mutex_key key_TABLE_SHARE_LOCK_rotation; +PSI_cond_key key_TABLE_SHARE_COND_rotation; + static PSI_mutex_info all_server_mutexes[]= { #ifdef HAVE_MMAP @@ -1008,6 +1015,7 @@ static PSI_mutex_info all_server_mutexes[]= { &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0}, { &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0}, { &key_TABLE_SHARE_LOCK_share, "TABLE_SHARE::LOCK_share", 0}, + { &key_TABLE_SHARE_LOCK_rotation, "TABLE_SHARE::LOCK_rotation", 0}, { &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL}, { &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL}, { &key_LOCK_after_binlog_sync, "LOCK_after_binlog_sync", PSI_FLAG_GLOBAL}, @@ -1028,8 +1036,8 @@ static PSI_mutex_info all_server_mutexes[]= PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock, - key_LOCK_SEQUENCE; - + key_LOCK_SEQUENCE, + key_rwlock_LOCK_vers_stats, key_rwlock_LOCK_stat_serial; static PSI_rwlock_info all_server_rwlocks[]= { @@ -1042,7 +1050,9 @@ static PSI_rwlock_info all_server_rwlocks[]= { &key_rwlock_LOCK_sys_init_slave, "LOCK_sys_init_slave", PSI_FLAG_GLOBAL}, { &key_LOCK_SEQUENCE, "LOCK_SEQUENCE", 0}, { &key_rwlock_LOCK_system_variables_hash, "LOCK_system_variables_hash", PSI_FLAG_GLOBAL}, - { &key_rwlock_query_cache_query_lock, "Query_cache_query::lock", 0} + { &key_rwlock_query_cache_query_lock, "Query_cache_query::lock", 0}, + { &key_rwlock_LOCK_vers_stats, "Vers_field_stats::lock", 0}, + { &key_rwlock_LOCK_stat_serial, "TABLE_SHARE::LOCK_stat_serial", 0} }; #ifdef HAVE_MMAP @@ -1125,7 +1135,8 @@ static PSI_cond_info all_server_conds[]= { &key_COND_slave_background, "COND_slave_background", 0}, { &key_COND_start_thread, "COND_start_thread", PSI_FLAG_GLOBAL}, { &key_COND_wait_gtid, "COND_wait_gtid", 0}, - { &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0} + { &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0}, + { &key_TABLE_SHARE_COND_rotation, "TABLE_SHARE::COND_rotation", 0} }; PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, @@ -6020,6 +6031,36 @@ int mysqld_main(int argc, char **argv) if (Events::init((THD*) 0, opt_noacl || opt_bootstrap)) unireg_abort(1); + if (opt_transaction_registry) + { + use_transaction_registry= true; + if (opt_bootstrap) + { + use_transaction_registry= false; + } + else + { + THD *thd = new THD(0); + thd->thread_stack= (char*) &thd; + thd->store_globals(); + { + TR_table trt(thd); + if (trt.check()) + { + use_transaction_registry= false; + } + } + + trans_commit_stmt(thd); + delete thd; + } + } + else + use_transaction_registry= false; + + if (opt_transaction_registry && !use_transaction_registry) + sql_print_information("Disabled transaction registry."); + if (WSREP_ON) { if (opt_bootstrap) @@ -9810,7 +9851,10 @@ static int get_options(int *argc_ptr, char ***argv_ptr) /* Ensure that some variables are not set higher than needed */ if (thread_cache_size > max_connections) SYSVAR_AUTOSIZE(thread_cache_size, max_connections); - + + if (opt_bootstrap) + global_system_variables.vers_force= 0; + return 0; } diff --git a/sql/mysqld.h b/sql/mysqld.h index 7a430d4f00b..5c389e60e17 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -179,6 +179,44 @@ extern char *opt_backup_history_logname, *opt_backup_progress_logname, *opt_backup_settings_name; extern const char *log_output_str; extern const char *log_backup_output_str; + +/* System Versioning begin */ +enum vers_range_type_t +{ + FOR_SYSTEM_TIME_UNSPECIFIED = 0, + FOR_SYSTEM_TIME_ALL, + FOR_SYSTEM_TIME_AS_OF, + FOR_SYSTEM_TIME_FROM_TO, + FOR_SYSTEM_TIME_BETWEEN, + FOR_SYSTEM_TIME_BEFORE +}; + +struct st_vers_asof_timestamp +{ + ulong type; + MYSQL_TIME ltime; + st_vers_asof_timestamp() : + type(FOR_SYSTEM_TIME_UNSPECIFIED) + {} +}; + +enum vers_hide_enum +{ + VERS_HIDE_AUTO= 0, + VERS_HIDE_IMPLICIT, + VERS_HIDE_FULL, + VERS_HIDE_NEVER +}; + +enum vers_alter_history_enum +{ + VERS_ALTER_HISTORY_ERROR= 0, + VERS_ALTER_HISTORY_KEEP, + VERS_ALTER_HISTORY_SURVIVE, + VERS_ALTER_HISTORY_DROP +}; +/* System Versioning end */ + extern char *mysql_home_ptr, *pidfile_name_ptr; extern MYSQL_PLUGIN_IMPORT char glob_hostname[FN_REFLEN]; extern char mysql_home[FN_REFLEN]; @@ -276,6 +314,8 @@ extern my_bool encrypt_tmp_disk_tables, encrypt_tmp_files; extern ulong encryption_algorithm; extern const char *encryption_algorithm_names[]; extern const char *quoted_string; +extern my_bool opt_transaction_registry; +extern my_bool use_transaction_registry; #ifdef HAVE_PSI_INTERFACE #ifdef HAVE_MMAP @@ -314,13 +354,16 @@ extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state, extern PSI_mutex_key key_TABLE_SHARE_LOCK_share, key_LOCK_stats, key_LOCK_global_user_client_stats, key_LOCK_global_table_stats, - key_LOCK_global_index_stats, key_LOCK_wakeup_ready, key_LOCK_wait_commit; + key_LOCK_global_index_stats, key_LOCK_wakeup_ready, key_LOCK_wait_commit, + key_TABLE_SHARE_LOCK_rotation; extern PSI_mutex_key key_LOCK_gtid_waiting; extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger, key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave, key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock, - key_LOCK_SEQUENCE; + key_LOCK_SEQUENCE, + key_rwlock_LOCK_vers_stats, key_rwlock_LOCK_stat_serial; + #ifdef HAVE_MMAP extern PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool; #endif /* HAVE_MMAP */ @@ -348,6 +391,7 @@ extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_queue, key_COND_rpl_thread_stop, key_COND_rpl_thread_pool, key_COND_parallel_entry, key_COND_group_commit_orderer; extern PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates; +extern PSI_cond_key key_TABLE_SHARE_COND_rotation; extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, key_thread_handle_manager, key_thread_kill_server, key_thread_main, diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 3a13a77ade0..fba449e7ce2 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3455,6 +3455,13 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) free_root(&alloc,MYF(0)); // Return memory & allocator DBUG_RETURN(FALSE); } + + if (part_info->part_type == VERSIONING_PARTITION && + part_info->vers_update_range_constants(thd)) + { + retval= TRUE; + goto end2; + } dbug_tmp_use_all_columns(table, old_sets, table->read_set, table->write_set); @@ -3555,6 +3562,7 @@ all_used: mark_all_partitions_as_used(prune_param.part_info); end: dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets); +end2: thd->no_errors=0; thd->mem_root= range_par->old_root; free_root(&alloc,MYF(0)); // Return memory & allocator @@ -3981,7 +3989,7 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) simply set res= -1 as if the mapper had returned that. TODO: What to do here is defined in WL#4065. */ - if (ppar->arg_stack[0]->part == 0) + if (ppar->arg_stack[0]->part == 0 || ppar->part_info->part_type == VERSIONING_PARTITION) { uint32 i; uint32 store_length_array[MAX_KEY]; @@ -7340,6 +7348,7 @@ SEL_TREE *Item_func_in::get_func_row_mm_tree(RANGE_OPT_PARAM *param, key_col->bring_value(); key_col_info.comparator= row_cmp_item->get_comparator(i); + DBUG_ASSERT(key_col_info.comparator); key_col_info.comparator->store_value(key_col); col_comparators++; diff --git a/sql/opt_range.h b/sql/opt_range.h index a436b0e684d..bd85a12d4a1 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1666,6 +1666,38 @@ class SQL_SELECT :public Sql_alloc { }; +class SQL_SELECT_auto +{ + SQL_SELECT *select; +public: + SQL_SELECT_auto(): select(NULL) + {} + ~SQL_SELECT_auto() + { + delete select; + } + SQL_SELECT_auto& + operator= (SQL_SELECT *_select) + { + select= _select; + return *this; + } + operator SQL_SELECT * () const + { + return select; + } + SQL_SELECT * + operator-> () const + { + return select; + } + operator bool () const + { + return select; + } +}; + + class FT_SELECT: public QUICK_RANGE_SELECT { public: diff --git a/sql/partition_element.h b/sql/partition_element.h index c774994b7f5..da08a15b2ce 100644 --- a/sql/partition_element.h +++ b/sql/partition_element.h @@ -26,7 +26,8 @@ enum partition_type { NOT_A_PARTITION= 0, RANGE_PARTITION, HASH_PARTITION, - LIST_PARTITION + LIST_PARTITION, + VERSIONING_PARTITION }; enum partition_state { @@ -89,8 +90,76 @@ typedef struct p_elem_val struct st_ddl_log_memory_entry; -class partition_element :public Sql_alloc { +/* Used for collecting MIN/MAX stats on sys_trx_end for doing pruning + in SYSTEM_TIME partitiong. */ +class Vers_min_max_stats : public Sql_alloc +{ + static const uint buf_size= 4 + (TIME_SECOND_PART_DIGITS + 1) / 2; + uchar min_buf[buf_size]; + uchar max_buf[buf_size]; + Field_timestampf min_value; + Field_timestampf max_value; + mysql_rwlock_t lock; + +public: + Vers_min_max_stats(const LEX_CSTRING *field_name, TABLE_SHARE *share) : + min_value(min_buf, NULL, 0, Field::NONE, field_name, share, 6), + max_value(max_buf, NULL, 0, Field::NONE, field_name, share, 6) + { + min_value.set_max(); + memset(max_buf, 0, buf_size); + mysql_rwlock_init(key_rwlock_LOCK_vers_stats, &lock); + } + ~Vers_min_max_stats() + { + mysql_rwlock_destroy(&lock); + } + bool update_unguarded(Field *from) + { + return + from->update_min(&min_value, false) + + from->update_max(&max_value, false); + } + bool update(Field *from) + { + mysql_rwlock_wrlock(&lock); + bool res= update_unguarded(from); + mysql_rwlock_unlock(&lock); + return res; + } + my_time_t min_time() + { + mysql_rwlock_rdlock(&lock); + ulong sec_part; + my_time_t res= min_value.get_timestamp(&sec_part); + mysql_rwlock_unlock(&lock); + return res; + } + my_time_t max_time() + { + mysql_rwlock_rdlock(&lock); + ulong sec_part; + my_time_t res= max_value.get_timestamp(&sec_part); + mysql_rwlock_unlock(&lock); + return res; + } +}; + +enum stat_trx_field +{ + STAT_TRX_END= 0 +}; + +class partition_element :public Sql_alloc +{ public: + enum elem_type + { + CONVENTIONAL= 0, + AS_OF_NOW, + VERSIONING + }; + List<partition_element> subpartitions; List<part_elem_value> list_val_list; ha_rows part_max_rows; @@ -109,6 +178,21 @@ public: bool has_null_value; bool signed_flag; // Range value signed bool max_value; // MAXVALUE range + uint32 id; + bool empty; + + // TODO: subclass partition_element by partitioning type to avoid such semantic + // mixup + elem_type type() + { + return (elem_type)(signed_flag << 1 | max_value); + } + + void type(elem_type val) + { + max_value= val & 1; + signed_flag= val & 2; + } partition_element() : part_max_rows(0), part_min_rows(0), range_value(0), @@ -117,9 +201,10 @@ public: data_file_name(NULL), index_file_name(NULL), engine_type(NULL), connect_string(null_clex_str), part_state(PART_NORMAL), nodegroup_id(UNDEF_NODEGROUP), has_null_value(FALSE), - signed_flag(FALSE), max_value(FALSE) - { - } + signed_flag(FALSE), max_value(FALSE), + id(UINT32_MAX), + empty(true) + {} partition_element(partition_element *part_elem) : part_max_rows(part_elem->part_max_rows), part_min_rows(part_elem->part_min_rows), @@ -132,10 +217,20 @@ public: connect_string(null_clex_str), part_state(part_elem->part_state), nodegroup_id(part_elem->nodegroup_id), - has_null_value(FALSE) + has_null_value(FALSE), + id(part_elem->id), + empty(part_elem->empty) + {} + ~partition_element() {} + + part_column_list_val& get_col_val(uint idx) { + DBUG_ASSERT(type() == CONVENTIONAL || list_val_list.elements == 1); + part_elem_value *ev= list_val_list.head(); + DBUG_ASSERT(ev); + DBUG_ASSERT(ev->col_val_array); + return ev->col_val_array[idx]; } - ~partition_element() {} }; #endif /* PARTITION_ELEMENT_INCLUDED */ diff --git a/sql/partition_info.cc b/sql/partition_info.cc index 0dc9a2638eb..f0dc999d677 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -21,6 +21,8 @@ #endif #include "mariadb.h" +#include <my_global.h> +#include <tztime.h> #include "sql_priv.h" // Required to get server definitions for mysql/plugin.h right #include "sql_plugin.h" @@ -30,6 +32,7 @@ #include "sql_parse.h" #include "sql_acl.h" // *_ACL #include "sql_base.h" // fill_record +#include "sql_statistics.h" // vers_stat_end #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" @@ -113,6 +116,19 @@ partition_info *partition_info::get_clone(THD *thd) part_clone->list_val_list.push_back(new_val, mem_root); } } + if (part_type == VERSIONING_PARTITION && vers_info) + { + // clone Vers_part_info; set now_part, hist_part + clone->vers_info= new (mem_root) Vers_part_info(*vers_info); + List_iterator<partition_element> it(clone->partitions); + while ((part= it++)) + { + if (vers_info->now_part && part->id == vers_info->now_part->id) + clone->vers_info->now_part= part; + else if (vers_info->hist_part && part->id == vers_info->hist_part->id) + clone->vers_info->hist_part= part; + } // while ((part= it++)) + } // if (part_type == VERSIONING_PARTITION ... DBUG_RETURN(clone); } @@ -199,6 +215,48 @@ bool partition_info::set_named_partition_bitmap(const char *part_name, @param table_list Table list pointing to table to prune. @return Operation status + @retval false Success + @retval true Failure +*/ +bool partition_info::set_read_partitions(List<char> *partition_names) +{ + DBUG_ENTER("partition_info::set_read_partitions"); + if (!partition_names || !partition_names->elements) + { + DBUG_RETURN(true); + } + + uint num_names= partition_names->elements; + List_iterator<char> partition_names_it(*partition_names); + uint i= 0; + /* + TODO: When adding support for FK in partitioned tables, the referenced + table must probably lock all partitions for read, and also write depending + of ON DELETE/UPDATE. + */ + bitmap_clear_all(&read_partitions); + + /* No check for duplicate names or overlapping partitions/subpartitions. */ + + DBUG_PRINT("info", ("Searching through partition_name_hash")); + do + { + char *part_name= partition_names_it++; + if (add_named_partition(part_name, strlen(part_name))) + DBUG_RETURN(true); + } while (++i < num_names); + DBUG_RETURN(false); +} + + + +/** + Prune away partitions not mentioned in the PARTITION () clause, + if used. + + @param table_list Table list pointing to table to prune. + + @return Operation status @retval true Failure @retval false Success */ @@ -779,6 +837,439 @@ bool partition_info::has_unique_name(partition_element *element) DBUG_RETURN(TRUE); } +bool partition_info::vers_init_info(THD * thd) +{ + part_type= VERSIONING_PARTITION; + list_of_part_fields= TRUE; + column_list= TRUE; + num_columns= 1; + vers_info= new (thd->mem_root) Vers_part_info; + if (!vers_info) + { + mem_alloc_error(sizeof(Vers_part_info)); + return true; + } + return false; +} + +bool partition_info::vers_set_interval(const INTERVAL & i) +{ + if (i.neg || i.second_part) + return true; + + DBUG_ASSERT(vers_info); + + // TODO: INTERVAL conversion to seconds leads to mismatch with calendar intervals (MONTH and YEAR) + vers_info->interval= static_cast<my_time_t>( + i.second + + i.minute * 60 + + i.hour * 60 * 60 + + i.day * 24 * 60 * 60 + + i.month * 30 * 24 * 60 * 60 + + i.year * 365 * 30 * 24 * 60 * 60); + + if (vers_info->interval == 0) + return true; + + return false; +} + +bool partition_info::vers_set_limit(ulonglong limit) +{ + if (limit < 1) + return true; + + DBUG_ASSERT(vers_info); + + vers_info->limit= limit; + return false; +} + +partition_element* +partition_info::vers_part_rotate(THD * thd) +{ + DBUG_ASSERT(table && table->s); + DBUG_ASSERT(vers_info && vers_info->initialized()); + + if (table->s->hist_part_id >= vers_info->now_part->id - 1) + { + DBUG_ASSERT(table->s->hist_part_id == vers_info->now_part->id - 1); + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PART_FULL, + ER_THD(thd, WARN_VERS_PART_FULL), + vers_info->hist_part->partition_name); + return vers_info->hist_part; + } + + table->s->hist_part_id++; + const char* old_part_name= vers_info->hist_part->partition_name; + vers_hist_part(); + + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_NOTE, + WARN_VERS_PART_ROTATION, + ER_THD(thd, WARN_VERS_PART_ROTATION), + old_part_name, + vers_info->hist_part->partition_name); + + return vers_info->hist_part; +} + +bool partition_info::vers_set_expression(THD *thd, partition_element *el, MYSQL_TIME& t) +{ + curr_part_elem= el; + init_column_part(thd); + el->list_val_list.empty(); + el->list_val_list.push_back(curr_list_val, thd->mem_root); + for (uint i= 0; i < num_columns; ++i) + { + part_column_list_val *col_val= add_column_value(thd); + if (el->type() == partition_element::AS_OF_NOW) + { + col_val->max_value= true; + col_val->item_expression= NULL; + col_val->column_value= NULL; + col_val->part_info= this; + col_val->fixed= 1; + continue; + } + Item *item_expression= new (thd->mem_root) Item_datetime_literal(thd, &t); + if (!item_expression) + return true; + /* We initialize col_val with bogus max value to make fix_partition_func() and check_range_constants() happy. + Later in vers_setup_stats() it is initialized with real stat value if there will be any. */ + /* FIXME: TIME_RESULT in col_val is expensive. It should be INT_RESULT + (got to be fixed when InnoDB is supported). */ + init_col_val(col_val, item_expression); + DBUG_ASSERT(item_expression == el->get_col_val(i).item_expression); + } // for (num_columns) + return false; +} + +bool partition_info::vers_setup_expression(THD * thd, uint32 alter_add) +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + + if (!table->versioned_by_sql()) + { + my_error(ER_VERSIONING_REQUIRED, MYF(0), table->s->table_name.str); + return true; + } + + if (alter_add) + { + DBUG_ASSERT(partitions.elements > alter_add + 1); + Vers_min_max_stats** old_array= table->s->stat_trx; + table->s->stat_trx= static_cast<Vers_min_max_stats**>( + alloc_root(&table->s->mem_root, sizeof(void *) * (partitions.elements * num_columns + 1))); + memcpy(table->s->stat_trx, old_array, sizeof(void *) * (partitions.elements - alter_add) * num_columns); + table->s->stat_trx[partitions.elements * num_columns]= NULL; + } + else + { + /* Prepare part_field_list */ + Field *sys_trx_end= table->vers_end_field(); + part_field_list.push_back(sys_trx_end->field_name.str, thd->mem_root); + DBUG_ASSERT(part_field_list.elements == num_columns); + // needed in handle_list_of_fields() + sys_trx_end->flags|= GET_FIXED_FIELDS_FLAG; + } + + List_iterator<partition_element> it(partitions); + partition_element *el; + MYSQL_TIME t; + memset(&t, 0, sizeof(t)); + my_time_t ts= TIMESTAMP_MAX_VALUE - partitions.elements; + uint32 id= 0; + while ((el= it++)) + { + DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL); + ++ts; + if (alter_add) + { + /* Non-empty historical partitions are left as is. */ + if (el->type() == partition_element::VERSIONING && !el->empty) + { + ++id; + continue; + } + /* Newly added element is inserted before AS_OF_NOW. */ + if (el->id == UINT32_MAX || el->type() == partition_element::AS_OF_NOW) + { + DBUG_ASSERT(table && table->s); + Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root) + Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s); + table->s->stat_trx[id * num_columns + STAT_TRX_END]= stat_trx_end; + el->id= id++; + if (el->type() == partition_element::AS_OF_NOW) + break; + goto set_expression; + } + /* Existing element expression is recalculated. */ + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + for (uint i= 0; i < num_columns; ++i) + { + part_column_list_val &col_val= el->get_col_val(i); + static_cast<Item_datetime_literal *>(col_val.item_expression)->set_time(&t); + col_val.fixed= 0; + } + ++id; + continue; + } + + set_expression: + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + if (vers_set_expression(thd, el, t)) + return true; + } + return false; +} + + +// scan table for min/max sys_trx_end +inline +bool partition_info::vers_scan_min_max(THD *thd, partition_element *part) +{ + uint32 sub_factor= num_subparts ? num_subparts : 1; + uint32 part_id= part->id * sub_factor; + uint32 part_id_end= part_id + sub_factor; + DBUG_ASSERT(part->empty); + DBUG_ASSERT(part->type() == partition_element::VERSIONING); + DBUG_ASSERT(table->s->stat_trx); + for (; part_id < part_id_end; ++part_id) + { + handler *file= table->file->part_handler(part_id); // requires update_partition() for ha_innopart + DBUG_ASSERT(file); + int rc= file->ha_external_lock(thd, F_RDLCK); // requires ha_commit_trans() for ha_innobase + if (rc) + { + file->update_partition(part_id); + goto lock_fail; + } + + table->default_column_bitmaps(); + bitmap_set_bit(table->read_set, table->vers_end_field()->field_index); + file->column_bitmaps_signal(); + + rc= file->ha_rnd_init(true); + if (!rc) + { + while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) + { + if (part->empty) + part->empty= false; + if (thd->killed) + { + file->ha_rnd_end(); + file->update_partition(part_id); + ha_commit_trans(thd, false); + return true; + } + if (rc) + { + if (rc == HA_ERR_RECORD_DELETED) + continue; + break; + } + if (table->vers_end_field()->is_max()) + { + rc= HA_ERR_INTERNAL_ERROR; + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PART_NON_HISTORICAL, + ER_THD(thd, WARN_VERS_PART_NON_HISTORICAL), + part->partition_name); + break; + } + if (table->versioned_by_engine()) + { + uchar buf[8]; + Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); + if (!vers_trx_id_to_ts(thd, table->vers_end_field(), fld)) + { + vers_stat_trx(STAT_TRX_END, part).update_unguarded(&fld); + } + } + else + { + vers_stat_trx(STAT_TRX_END, part).update_unguarded(table->vers_end_field()); + } + } + file->ha_rnd_end(); + } + file->ha_external_lock(thd, F_UNLCK); + file->update_partition(part_id); + if (rc != HA_ERR_END_OF_FILE) + { + ha_commit_trans(thd, false); + lock_fail: + // TODO: print rc code + my_error(ER_INTERNAL_ERROR, MYF(0), "min/max scan failed in versioned partitions setup (see warnings)"); + return true; + } + } + ha_commit_trans(thd, false); + return false; +} + +void partition_info::vers_update_col_vals(THD *thd, partition_element *el0, partition_element *el1) +{ + MYSQL_TIME t; + memset(&t, 0, sizeof(t)); + DBUG_ASSERT(table && table->s && table->s->stat_trx); + DBUG_ASSERT(!el0 || el1->id == el0->id + 1); + const uint idx= el1->id * num_columns; + my_time_t ts; + part_column_list_val *col_val; + Item_datetime_literal *val_item; + Vers_min_max_stats *stat_trx_x; + for (uint i= 0; i < num_columns; ++i) + { + stat_trx_x= table->s->stat_trx[idx + i]; + if (el0) + { + ts= stat_trx_x->min_time(); + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + col_val= &el0->get_col_val(i); + val_item= static_cast<Item_datetime_literal*>(col_val->item_expression); + DBUG_ASSERT(val_item); + if (*val_item > t) + { + val_item->set_time(&t); + col_val->fixed= 0; + } + } + col_val= &el1->get_col_val(i); + if (!col_val->max_value) + { + ts= stat_trx_x->max_time() + 1; + thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); + val_item= static_cast<Item_datetime_literal*>(col_val->item_expression); + DBUG_ASSERT(val_item); + if (*val_item < t) + { + val_item->set_time(&t); + col_val->fixed= 0; + } + } + } +} + + +// setup at open() phase (TABLE_SHARE is initialized) +bool partition_info::vers_setup_stats(THD * thd, bool is_create_table_ind) +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + DBUG_ASSERT(vers_info && vers_info->initialized(false)); + DBUG_ASSERT(table && table->s); + + bool error= false; + + mysql_mutex_lock(&table->s->LOCK_rotation); + if (table->s->busy_rotation) + { + table->s->vers_wait_rotation(); + vers_hist_part(); + } + else + { + table->s->busy_rotation= true; + mysql_mutex_unlock(&table->s->LOCK_rotation); + + DBUG_ASSERT(part_field_list.elements == num_columns); + + bool dont_stat= true; + bool col_val_updated= false; + // initialize stat_trx + if (!table->s->stat_trx) + { + DBUG_ASSERT(partitions.elements > 1); + table->s->stat_trx= static_cast<Vers_min_max_stats**>( + alloc_root(&table->s->mem_root, sizeof(void *) * (partitions.elements * num_columns + 1))); + table->s->stat_trx[partitions.elements * num_columns]= NULL; + dont_stat= false; + } + + // build freelist, scan min/max, assign hist_part + List_iterator<partition_element> it(partitions); + partition_element *el= NULL, *prev; + while ((prev= el, el= it++)) + { + if (el->type() == partition_element::VERSIONING && dont_stat) + { + if (el->id == table->s->hist_part_id) + { + vers_info->hist_part= el; + break; + } + continue; + } + + { + Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root) + Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s); + table->s->stat_trx[el->id * num_columns + STAT_TRX_END]= stat_trx_end; + } + + if (!is_create_table_ind) + { + if (el->type() == partition_element::AS_OF_NOW) + { + uchar buf[8]; + Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); + fld.set_max(); + vers_stat_trx(STAT_TRX_END, el).update_unguarded(&fld); + el->empty= false; + } + else if (vers_scan_min_max(thd, el)) + { + table->s->stat_trx= NULL; // may be a leak on endless table open + error= true; + break; + } + if (!el->empty) + { + vers_update_col_vals(thd, prev, el); + col_val_updated= true; + } + } + + if (el->type() == partition_element::AS_OF_NOW) + break; + + DBUG_ASSERT(el->type() == partition_element::VERSIONING); + + if (vers_info->hist_part) + { + if (!el->empty) + goto set_hist_part; + } + else + { + set_hist_part: + vers_info->hist_part= el; + continue; + } + } // while + + if (!error && !dont_stat) + { + if (col_val_updated) + table->s->stat_serial++; + + table->s->hist_part_id= vers_info->hist_part->id; + if (!is_create_table_ind && (vers_limit_exceed() || vers_interval_exceed())) + vers_part_rotate(thd); + } + mysql_mutex_lock(&table->s->LOCK_rotation); + mysql_cond_broadcast(&table->s->COND_rotation); + table->s->busy_rotation= false; + } + mysql_mutex_unlock(&table->s->LOCK_rotation); + return error; +} + /* Check that the partition/subpartition is setup to use the correct @@ -962,7 +1453,7 @@ error: called for RANGE PARTITIONed tables. */ -bool partition_info::check_range_constants(THD *thd) +bool partition_info::check_range_constants(THD *thd, bool alloc) { partition_element* part_def; bool first= TRUE; @@ -979,12 +1470,15 @@ bool partition_info::check_range_constants(THD *thd) part_column_list_val *UNINIT_VAR(current_largest_col_val); uint num_column_values= part_field_list.elements; uint size_entries= sizeof(part_column_list_val) * num_column_values; - range_col_array= (part_column_list_val*) thd->calloc(num_parts * - size_entries); - if (unlikely(range_col_array == NULL)) + if (alloc) { - mem_alloc_error(num_parts * size_entries); - goto end; + range_col_array= (part_column_list_val*) thd->calloc(num_parts * + size_entries); + if (unlikely(range_col_array == NULL)) + { + mem_alloc_error(num_parts * size_entries); + goto end; + } } loc_range_col_array= range_col_array; i= 0; @@ -1017,11 +1511,14 @@ bool partition_info::check_range_constants(THD *thd) longlong part_range_value; bool signed_flag= !part_expr->unsigned_flag; - range_int_array= (longlong*) thd->alloc(num_parts * sizeof(longlong)); - if (unlikely(range_int_array == NULL)) + if (alloc) { - mem_alloc_error(num_parts * sizeof(longlong)); - goto end; + range_int_array= (longlong*) thd->alloc(num_parts * sizeof(longlong)); + if (unlikely(range_int_array == NULL)) + { + mem_alloc_error(num_parts * sizeof(longlong)); + goto end; + } } i= 0; do @@ -1385,6 +1882,8 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, uint i, tot_partitions; bool result= TRUE, table_engine_set; const char *same_name; + uint32 hist_parts= 0; + uint32 now_parts= 0; DBUG_ENTER("partition_info::check_partition_info"); DBUG_ASSERT(default_engine_type != partition_hton); @@ -1426,7 +1925,8 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, } if (unlikely(is_sub_partitioned() && (!(part_type == RANGE_PARTITION || - part_type == LIST_PARTITION)))) + part_type == LIST_PARTITION || + part_type == VERSIONING_PARTITION)))) { /* Only RANGE and LIST partitioning can be subpartitioned */ my_error(ER_SUBPARTITION_ERROR, MYF(0)); @@ -1488,6 +1988,19 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, my_error(ER_SAME_NAME_PARTITION, MYF(0), same_name); goto end; } + + if (part_type == VERSIONING_PARTITION) + { + DBUG_ASSERT(vers_info); + if (num_parts < 2 || !vers_info->now_part) + { + DBUG_ASSERT(info && info->alias); + my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias); + goto end; + } + DBUG_ASSERT(vers_info->initialized(false)); + DBUG_ASSERT(num_parts == partitions.elements); + } i= 0; { List_iterator<partition_element> part_it(partitions); @@ -1568,6 +2081,18 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, } } } + if (part_type == VERSIONING_PARTITION) + { + if (part_elem->type() == partition_element::VERSIONING) + { + hist_parts++; + } + else + { + DBUG_ASSERT(part_elem->type() == partition_element::AS_OF_NOW); + now_parts++; + } + } } while (++i < num_parts); if (!table_engine_set && num_parts_not_set != 0 && @@ -1599,12 +2124,29 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, if (add_or_reorg_part) { - if (unlikely((part_type == RANGE_PARTITION && + if (unlikely(((part_type == RANGE_PARTITION || part_type == VERSIONING_PARTITION) && check_range_constants(thd)) || (part_type == LIST_PARTITION && check_list_constants(thd)))) goto end; } + + if (hist_parts > 1) + { + if (vers_info->limit == 0 && vers_info->interval == 0) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_PARAMETERS, + ER_THD(thd, WARN_VERS_PARAMETERS), + "no rotation condition for multiple `VERSIONING` partitions."); + } + } + if (now_parts > 1) + { + my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias); + goto end; + } result= FALSE; end: DBUG_RETURN(result); @@ -2814,6 +3356,83 @@ bool partition_info::has_same_partitioning(partition_info *new_part_info) } +static bool has_same_column_order(List<Create_field> *create_list, + Field** field_array) +{ + Field **f_ptr; + List_iterator_fast<Create_field> new_field_it; + Create_field *new_field= NULL; + new_field_it.init(*create_list); + + for (f_ptr= field_array; *f_ptr; f_ptr++) + { + while ((new_field= new_field_it++)) + { + if (new_field->field == *f_ptr) + break; + } + if (!new_field) + break; + } + + if (!new_field) + { + /* Not same order!*/ + return false; + } + return true; +} + +bool partition_info::vers_trx_id_to_ts(THD* thd, Field* in_trx_id, Field_timestamp& out_ts) +{ + DBUG_ASSERT(table); + handlerton *hton= plugin_hton(table->s->db_plugin); + DBUG_ASSERT(hton); + ulonglong trx_id= in_trx_id->val_int(); + TR_table trt(thd); + bool found= trt.query(trx_id); + if (!found) + { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + WARN_VERS_TRX_MISSING, + ER_THD(thd, WARN_VERS_TRX_MISSING), + trx_id); + return true; + } + MYSQL_TIME ts; + trt[TR_table::FLD_COMMIT_TS]->get_date(&ts, 0); + out_ts.store_time_dec(&ts, 6); + return false; +} + + +/** + Check if the partitioning columns are in the same order as the given list. + + Used to see if INPLACE alter can be allowed or not. If the order is + different then the rows must be redistributed for KEY [sub]partitioning. + + @param[in] create_list Column list after ALTER TABLE. + @return true is same order as before ALTER TABLE, else false. +*/ +bool partition_info::same_key_column_order(List<Create_field> *create_list) +{ + /* Only need to check for KEY [sub] partitioning. */ + if (list_of_part_fields && !column_list) + { + if (!has_same_column_order(create_list, part_field_array)) + return false; + } + if (list_of_subpart_fields) + { + if (!has_same_column_order(create_list, subpart_field_array)) + return false; + } + return true; +} + + void partition_info::print_debug(const char *str, uint *value) { DBUG_ENTER("print_debug"); diff --git a/sql/partition_info.h b/sql/partition_info.h index 38a353c8507..6b79fa82fb8 100644 --- a/sql/partition_info.h +++ b/sql/partition_info.h @@ -22,6 +22,7 @@ #include "sql_class.h" #include "partition_element.h" +#include "sql_partition.h" class partition_info; struct TABLE_LIST; @@ -34,6 +35,45 @@ typedef int (*get_subpart_id_func)(partition_info *part_info, struct st_ddl_log_memory_entry; +struct Vers_part_info : public Sql_alloc +{ + Vers_part_info() : + interval(0), + limit(0), + now_part(NULL), + hist_part(NULL), + stat_serial(0) + { + } + Vers_part_info(Vers_part_info &src) : + interval(src.interval), + limit(src.limit), + now_part(NULL), + hist_part(NULL), + stat_serial(src.stat_serial) + { + } + bool initialized(bool fully= true) + { + if (now_part) + { + DBUG_ASSERT(now_part->id != UINT32_MAX); + DBUG_ASSERT(now_part->type() == partition_element::AS_OF_NOW); + DBUG_ASSERT(!fully || (bool) hist_part); + DBUG_ASSERT(!hist_part || ( + hist_part->id != UINT32_MAX && + hist_part->type() == partition_element::VERSIONING)); + return true; + } + return false; + } + my_time_t interval; + ulonglong limit; + partition_element *now_part; + partition_element *hist_part; + ulonglong stat_serial; +}; + class partition_info : public Sql_alloc { public: @@ -143,6 +183,8 @@ public: part_column_list_val *range_col_array; part_column_list_val *list_col_array; }; + + Vers_part_info *vers_info; /******************************************** * INTERVAL ANALYSIS @@ -302,7 +344,7 @@ public: const char *find_duplicate_field(); char *find_duplicate_name(); bool check_engine_mix(handlerton *engine_type, bool default_engine); - bool check_range_constants(THD *thd); + bool check_range_constants(THD *thd, bool alloc= true); bool check_list_constants(THD *thd); bool check_partition_info(THD *thd, handlerton **eng_type, handler *file, HA_CREATE_INFO *info, @@ -332,6 +374,28 @@ public: size_t file_name_size, uint32 *part_id); void report_part_expr_error(bool use_subpart_expr); bool has_same_partitioning(partition_info *new_part_info); + inline bool is_partition_used(uint part_id) const + { + return bitmap_is_set(&read_partitions, part_id); + } + inline bool is_partition_locked(uint part_id) const + { + return bitmap_is_set(&lock_partitions, part_id); + } + inline uint num_partitions_used() + { + return bitmap_bits_set(&read_partitions); + } + inline uint get_first_used_partition() const + { + return bitmap_get_first_set(&read_partitions); + } + inline uint get_next_used_partition(uint part_id) const + { + return bitmap_get_next_set(&read_partitions, part_id); + } + bool same_key_column_order(List<Create_field> *create_list); + private: static int list_part_cmp(const void* a, const void* b); bool set_up_default_partitions(THD *thd, handler *file, HA_CREATE_INFO *info, @@ -342,10 +406,167 @@ private: uint start_no); char *create_default_subpartition_name(THD *thd, uint subpart_no, const char *part_name); + // FIXME: prune_partition_bitmaps() is duplicate of set_read_partitions() bool prune_partition_bitmaps(TABLE_LIST *table_list); bool add_named_partition(const char *part_name, uint length); public: + bool set_read_partitions(List<char> *partition_names); bool has_unique_name(partition_element *element); + + bool vers_init_info(THD *thd); + bool vers_set_interval(const INTERVAL &i); + bool vers_set_limit(ulonglong limit); + partition_element* vers_part_rotate(THD *thd); + bool vers_set_expression(THD *thd, partition_element *el, MYSQL_TIME &t); + bool vers_setup_expression(THD *thd, uint32 alter_add= 0); /* Stage 1. */ + bool vers_setup_stats(THD *thd, bool is_create_table_ind); /* Stage 2. */ + bool vers_scan_min_max(THD *thd, partition_element *part); + void vers_update_col_vals(THD *thd, partition_element *el0, partition_element *el1); + + partition_element *vers_hist_part() + { + DBUG_ASSERT(table && table->s); + DBUG_ASSERT(vers_info && vers_info->initialized()); + DBUG_ASSERT(table->s->hist_part_id != UINT32_MAX); + if (table->s->hist_part_id == vers_info->hist_part->id) + return vers_info->hist_part; + + List_iterator<partition_element> it(partitions); + partition_element *el; + while ((el= it++)) + { + DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL); + if (el->type() == partition_element::VERSIONING && + el->id == table->s->hist_part_id) + { + vers_info->hist_part= el; + return vers_info->hist_part; + } + } + DBUG_ASSERT(0); + return NULL; + } + partition_element *get_partition(uint part_id) + { + List_iterator<partition_element> it(partitions); + partition_element *el; + while ((el= it++)) + { + if (el->id == part_id) + return el; + } + return NULL; + } + bool vers_limit_exceed(partition_element *part= NULL) + { + DBUG_ASSERT(vers_info); + if (!vers_info->limit) + return false; + if (!part) + { + DBUG_ASSERT(vers_info->initialized()); + part= vers_hist_part(); + } + // TODO: cache thread-shared part_recs and increment on INSERT + return table->file->part_records(part) >= vers_info->limit; + } + Vers_min_max_stats& vers_stat_trx(stat_trx_field fld, uint32 part_element_id) + { + DBUG_ASSERT(table && table->s && table->s->stat_trx); + Vers_min_max_stats* res= table->s->stat_trx[part_element_id * num_columns + fld]; + DBUG_ASSERT(res); + return *res; + } + Vers_min_max_stats& vers_stat_trx(stat_trx_field fld, partition_element *part) + { + DBUG_ASSERT(part); + return vers_stat_trx(fld, part->id); + } + bool vers_interval_exceed(my_time_t max_time, partition_element *part= NULL) + { + DBUG_ASSERT(vers_info); + if (!vers_info->interval) + return false; + if (!part) + { + DBUG_ASSERT(vers_info->initialized()); + part= vers_hist_part(); + } + my_time_t min_time= vers_stat_trx(STAT_TRX_END, part).min_time(); + return max_time - min_time > vers_info->interval; + } + bool vers_interval_exceed(partition_element *part) + { + return vers_interval_exceed(vers_stat_trx(STAT_TRX_END, part).max_time(), part); + } + bool vers_interval_exceed() + { + return vers_interval_exceed(vers_hist_part()); + } + bool vers_trx_id_to_ts(THD *thd, Field *in_trx_id, Field_timestamp &out_ts); + void vers_update_stats(THD *thd, partition_element *el) + { + DBUG_ASSERT(vers_info && vers_info->initialized()); + DBUG_ASSERT(table && table->s); + DBUG_ASSERT(el && el->type() == partition_element::VERSIONING); + bool updated; + mysql_rwlock_wrlock(&table->s->LOCK_stat_serial); + el->empty= false; + if (table->versioned_by_engine()) + { + // transaction is not yet pushed to VTQ, so we use now-time + my_time_t end_ts= my_time_t(0); + + uchar buf[8]; + Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); + fld.store_TIME(end_ts, 0); + updated= + vers_stat_trx(STAT_TRX_END, el->id).update(&fld); + } + else + { + updated= + vers_stat_trx(STAT_TRX_END, el->id).update(table->vers_end_field()); + } + if (updated) + table->s->stat_serial++; + mysql_rwlock_unlock(&table->s->LOCK_stat_serial); + if (updated) + { + vers_update_col_vals(thd, + el->id > 0 ? get_partition(el->id - 1) : NULL, + el); + } + } + void vers_update_stats(THD *thd, uint part_id) + { + DBUG_ASSERT(vers_info && vers_info->initialized()); + if (part_id < vers_info->now_part->id) + vers_update_stats(thd, get_partition(part_id)); + } + bool vers_update_range_constants(THD *thd) + { + DBUG_ASSERT(vers_info && vers_info->initialized()); + DBUG_ASSERT(table && table->s); + + mysql_rwlock_rdlock(&table->s->LOCK_stat_serial); + if (vers_info->stat_serial == table->s->stat_serial) + { + mysql_rwlock_unlock(&table->s->LOCK_stat_serial); + return false; + } + + bool result= false; + for (uint i= 0; i < num_columns; ++i) + { + Field *f= part_field_array[i]; + bitmap_set_bit(f->table->write_set, f->field_index); + } + result= check_range_constants(thd, false); + vers_info->stat_serial= table->s->stat_serial; + mysql_rwlock_unlock(&table->s->LOCK_stat_serial); + return result; + } }; uint32 get_next_partition_id_range(struct st_partition_iter* part_iter); diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 516d009a69e..e12f6cb258b 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -5752,8 +5752,8 @@ ER_MAX_PREPARED_STMT_COUNT_REACHED 42000 eng "Can't create more than max_prepared_stmt_count statements (current value: %lu)" ger "Kann nicht mehr Anweisungen als max_prepared_stmt_count erzeugen (aktueller Wert: %lu)" ER_VIEW_RECURSIVE - eng "`%-.192s`.`%-.192s` contains view recursion" - ger "`%-.192s`.`%-.192s` enthält View-Rekursion" + eng "%`s.%`s contains view recursion" + ger "%`s.%`s enthält View-Rekursion" ER_NON_GROUPING_FIELD_USED 42000 eng "Non-grouping field '%-.192s' is used in %-.64s clause" ger "In der %-.192s-Klausel wird das die Nicht-Gruppierungsspalte '%-.64s' verwendet" @@ -6249,37 +6249,37 @@ ER_BINLOG_LOGGING_IMPOSSIBLE eng "Binary logging not possible. Message: %s" ger "Binärlogging nicht möglich. Meldung: %s" ER_VIEW_NO_CREATION_CTX - eng "View `%-.64s`.`%-.64s` has no creation context" - ger "View `%-.64s`.`%-.64s` hat keinen Erzeugungskontext" + eng "View %`s.%`s has no creation context" + ger "View %`s.%`s hat keinen Erzeugungskontext" ER_VIEW_INVALID_CREATION_CTX - eng "Creation context of view `%-.64s`.`%-.64s' is invalid" - ger "Erzeugungskontext des Views`%-.64s`.`%-.64s' ist ungültig" + eng "Creation context of view %`s.%`s is invalid" + ger "Erzeugungskontext des Views%`s.%`s ist ungültig" ER_SR_INVALID_CREATION_CTX - eng "Creation context of stored routine `%-.64s`.`%-.64s` is invalid" - ger "Erzeugungskontext der gespeicherten Routine`%-.64s`.`%-.64s` ist ungültig" + eng "Creation context of stored routine %`s.%`s is invalid" + ger "Erzeugungskontext der gespeicherten Routine%`s.%`s ist ungültig" ER_TRG_CORRUPTED_FILE - eng "Corrupted TRG file for table `%-.64s`.`%-.64s`" - ger "Beschädigte TRG-Datei für Tabelle `%-.64s`.`%-.64s`" + eng "Corrupted TRG file for table %`s.%`s" + ger "Beschädigte TRG-Datei für Tabelle %`s.%`s" ER_TRG_NO_CREATION_CTX - eng "Triggers for table `%-.64s`.`%-.64s` have no creation context" - ger "Trigger für Tabelle `%-.64s`.`%-.64s` haben keinen Erzeugungskontext" + eng "Triggers for table %`s.%`s have no creation context" + ger "Trigger für Tabelle %`s.%`s haben keinen Erzeugungskontext" ER_TRG_INVALID_CREATION_CTX - eng "Trigger creation context of table `%-.64s`.`%-.64s` is invalid" - ger "Trigger-Erzeugungskontext der Tabelle `%-.64s`.`%-.64s` ist ungültig" + eng "Trigger creation context of table %`s.%`s is invalid" + ger "Trigger-Erzeugungskontext der Tabelle %`s.%`s ist ungültig" ER_EVENT_INVALID_CREATION_CTX - eng "Creation context of event `%-.64s`.`%-.64s` is invalid" - ger "Erzeugungskontext des Events `%-.64s`.`%-.64s` ist ungültig" + eng "Creation context of event %`s.%`s is invalid" + ger "Erzeugungskontext des Events %`s.%`s ist ungültig" ER_TRG_CANT_OPEN_TABLE - eng "Cannot open table for trigger `%-.64s`.`%-.64s`" - ger "Kann Tabelle für den Trigger `%-.64s`.`%-.64s` nicht öffnen" + eng "Cannot open table for trigger %`s.%`s" + ger "Kann Tabelle für den Trigger %`s.%`s nicht öffnen" ER_CANT_CREATE_SROUTINE - eng "Cannot create stored routine `%-.64s`. Check warnings" - ger "Kann gespeicherte Routine `%-.64s` nicht erzeugen. Beachten Sie die Warnungen" + eng "Cannot create stored routine %`s. Check warnings" + ger "Kann gespeicherte Routine %`s nicht erzeugen. Beachten Sie die Warnungen" ER_UNUSED_11 eng "You should never see it" ER_NO_FORMAT_DESCRIPTION_EVENT_BEFORE_BINLOG_STATEMENT - eng "The BINLOG statement of type `%s` was not preceded by a format description BINLOG statement" - ger "Der BINLOG-Anweisung vom Typ `%s` ging keine BINLOG-Anweisung zur Formatbeschreibung voran" + eng "The BINLOG statement of type %s was not preceded by a format description BINLOG statement" + ger "Der BINLOG-Anweisung vom Typ %s ging keine BINLOG-Anweisung zur Formatbeschreibung voran" ER_SLAVE_CORRUPT_EVENT eng "Corrupted replication event was detected" ger "Beschädigtes Replikationsereignis entdeckt" @@ -6596,7 +6596,7 @@ ER_MULTI_UPDATE_KEY_CONFLICT # When translating this error message make sure to include "ALTER TABLE" in the # message as mysqlcheck parses the error message looking for ALTER TABLE. ER_TABLE_NEEDS_REBUILD - eng "Table rebuild required. Please do \"ALTER TABLE `%-.32s` FORCE\" or dump/reload to fix it!" + eng "Table rebuild required. Please do \"ALTER TABLE %`s FORCE\" or dump/reload to fix it!" WARN_OPTION_BELOW_LIMIT eng "The value of '%s' should be no less than the value of '%s'" @@ -7809,3 +7809,119 @@ ER_INVALID_AGGREGATE_FUNCTION eng "Aggregate specific instruction(FETCH GROUP NEXT ROW) missing from the aggregate function" ER_INVALID_VALUE_TO_LIMIT eng "Limit only accepts integer values" + +# MariaDB error numbers related to System Versioning + +ER_VERSIONING_REQUIRED + eng "System Versioning required: %s" + +ER_VERS_READONLY_FIELD + eng "System field %`s is read-only" + +ER_UPDATE_INFO_WITH_SYSTEM_VERSIONING + eng "Rows matched: %ld Changed: %ld Inserted: %ld Warnings: %ld" + +ER_VERS_FIELD_WRONG_TYPE + eng "%`s must be of type %s for versioned table %`s" + +ER_VERS_ENGINE_UNSUPPORTED + eng "Engine does not support System Versioning for %`s" + +ER_VERS_RANGE_UNITS_MISMATCH + eng "Range units mismatch" + +ER_NON_VERSIONED_FIELD_IN_VERSIONED_QUERY + eng "Attempt to read unversioned field %`s in historical query" + +ER_PARTITION_WRONG_TYPE + eng "Wrong partition type, expected type: %`s" + +WARN_VERS_PART_FULL + eng "Using full partition %`s, need more VERSIONING partitions!" + +WARN_VERS_PARAMETERS + eng "Maybe missing parameters: %s" + +WARN_VERS_PART_ROTATION + eng "Switching from partition %`s to %`s" + +WARN_VERS_TRX_MISSING + eng "VTQ missing transaction ID %lu" + +WARN_VERS_PART_NON_HISTORICAL + eng "Partition %`s contains non-historical data" + +ER_VERS_ALTER_NOT_ALLOWED + eng "Not allowed for versioned %`s.%`s. Change `versioning_alter_history` to proceed with ALTER." + +ER_VERS_ALTER_ENGINE_PROHIBITED + eng "Not allowed for versioned %`s.%`s. Change to/from native versioning engine is prohibited." + +ER_VERS_RANGE_PROHIBITED + eng "SYSTEM_TIME range selector is prohibited" + +ER_VERS_VIEW_PROHIBITED + eng "Creating VIEW %`s is prohibited!" + +ER_VERS_DERIVED_PROHIBITED + eng "Derived table is prohibited!" + +ER_VERS_SYSTEM_TIME_CLASH + eng "SYSTEM_TIME is not allowed outside historical %`s" + +ER_VERS_UNUSED_CLAUSE + eng "Unused clause: '%s'" + +WARN_VERS_ALIAS_TOO_LONG + eng "Auto generated alias for %`s.%`s is too long; using %`s" + +ER_VERS_VTMD_ERROR + eng "VTMD error: %s" + +ER_VERS_DIFFERENT_TABLES + eng "Wrong parameters for %`s: system fields selected from different tables" + +ER_VERS_NO_COLS_DEFINED + eng "Table %`s has no versioned columns" + +ER_VERS_NOT_VERSIONED + eng "Table %`s is not versioned" + +ER_MISSING + eng "Wrong parameters for %`s: missing '%s'" + +ER_VERS_PERIOD_COLUMNS + eng "PERIOD FOR SYSTEM_TIME must use columns %`s and %`s" + +ER_PART_WRONG_VALUE + eng "Wrong parameters for partitioned %`s: wrong value for '%s'" + +ER_VERS_WRONG_PARTS + eng "Wrong partitions consistency for %`s: must have at least one 'VERSIONING' and exactly one last 'AS OF NOW'" + +ER_VERS_HISTORY_LOCK + eng "Versioned SELECT write-locking of history rows" + +ER_VERS_NO_TRX_ID + eng "TRX_ID %lu not found in `mysql.transaction_registry`" + +ER_WRONG_TABLESPACE_NAME 42000 + eng "Incorrect tablespace name %`s" + +ER_VERS_ALTER_SYSTEM_FIELD + eng "Can not change system versioning field %`s" + +ER_VERS_SYS_FIELD_NOT_HIDDEN + eng "System versioning field %`s is not hidden" + +ER_NOT_LOG_TABLE + eng "Table %`s.%`s is not a log table" + +ER_VERS_GENERATED_ALWAYS_NOT_EMPTY + eng "Can not modify column %`s to GENERATED ALWAYS AS ROW START/END for non-empty table" + +ER_VERS_TRT_IS_DISABLED + eng "Some versioned DML requires `transaction_registry` to be set to ON." + +ER_VERS_DUPLICATE_ROW_START_END + eng "Duplicate ROW %s column %`s" diff --git a/sql/sp_cache.cc b/sql/sp_cache.cc index 342673bf619..cf890f316bc 100644 --- a/sql/sp_cache.cc +++ b/sql/sp_cache.cc @@ -238,6 +238,10 @@ void sp_cache_flush_obsolete(sp_cache **cp, sp_head **sp) } } +void sp_cache_flush(sp_cache *cp, sp_head *sp) +{ + cp->remove(sp); +} /** Return the current global version of the cache. diff --git a/sql/sp_cache.h b/sql/sp_cache.h index a045ff5d3c5..59e0fc186dd 100644 --- a/sql/sp_cache.h +++ b/sql/sp_cache.h @@ -60,6 +60,7 @@ void sp_cache_insert(sp_cache **cp, sp_head *sp); sp_head *sp_cache_lookup(sp_cache **cp, const Database_qualified_name *name); void sp_cache_invalidate(); void sp_cache_flush_obsolete(sp_cache **cp, sp_head **sp); +void sp_cache_flush(sp_cache *cp, sp_head *sp); ulong sp_cache_version(); void sp_cache_enforce_limit(sp_cache *cp, ulong upper_limit_for_elements); diff --git a/sql/sp_head.cc b/sql/sp_head.cc index a0f0fb271ef..28e4442dd0c 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -2542,6 +2542,7 @@ sp_head::restore_thd_mem_root(THD *thd) Item *flist= free_list; // The old list set_query_arena(thd); // Get new free_list and mem_root state= STMT_INITIALIZED_FOR_SP; + is_stored_procedure= true; DBUG_PRINT("info", ("mem_root %p returned from thd mem root %p", &mem_root, &thd->mem_root)); diff --git a/sql/sp_head.h b/sql/sp_head.h index 7e477544958..04b67941226 100644 --- a/sql/sp_head.h +++ b/sql/sp_head.h @@ -213,7 +213,7 @@ public: ulong sp_cache_version() const { return m_sp_cache_version; } /** Set the value of the SP cache version. */ - void set_sp_cache_version(ulong version_arg) + void set_sp_cache_version(ulong version_arg) const { m_sp_cache_version= version_arg; } @@ -235,7 +235,7 @@ private: is obsolete and should not be used -- sp_cache_flush_obsolete() will purge it. */ - ulong m_sp_cache_version; + mutable ulong m_sp_cache_version; Stored_program_creation_ctx *m_creation_ctx; /** Boolean combination of (1<<flag), where flag is a member of diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 9d3299a63d4..307a6badd70 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -170,6 +170,11 @@ TABLE_FIELD_TYPE mysql_db_table_fields[MYSQL_DB_FIELD_COUNT] = { { C_STRING_WITH_LEN("Trigger_priv") }, { C_STRING_WITH_LEN("enum('N','Y')") }, { C_STRING_WITH_LEN("utf8") } + }, + { + { C_STRING_WITH_LEN("Truncate_versioning_priv") }, + { C_STRING_WITH_LEN("enum('N','Y')") }, + { C_STRING_WITH_LEN("utf8") } } }; @@ -695,9 +700,9 @@ bool ROLE_GRANT_PAIR::init(MEM_ROOT *mem, const char *username, #endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */ #define NORMAL_HANDSHAKE_SIZE 6 -#define ROLE_ASSIGN_COLUMN_IDX 43 -#define DEFAULT_ROLE_COLUMN_IDX 44 -#define MAX_STATEMENT_TIME_COLUMN_IDX 45 +#define ROLE_ASSIGN_COLUMN_IDX 44 +#define DEFAULT_ROLE_COLUMN_IDX 45 +#define MAX_STATEMENT_TIME_COLUMN_IDX 46 /* various flags valid for ACL_USER */ #define IS_ROLE (1L << 0) @@ -2013,6 +2018,9 @@ static bool acl_load(THD *thd, const Grant_tables& tables) if (user_table.num_fields() <= 38 && (user.access & SUPER_ACL)) user.access|= TRIGGER_ACL; + if (user_table.num_fields() <= 46 && (user.access & DELETE_ACL)) + user.access|= DELETE_VERSIONING_ROWS_ACL; + user.sort= get_sort(2, user.host.hostname, user.user.str); user.hostname_length= safe_strlen(user.host.hostname); user.user_resource.user_conn= 0; @@ -8469,13 +8477,14 @@ static const char *command_array[]= "ALTER", "SHOW DATABASES", "SUPER", "CREATE TEMPORARY TABLES", "LOCK TABLES", "EXECUTE", "REPLICATION SLAVE", "REPLICATION CLIENT", "CREATE VIEW", "SHOW VIEW", "CREATE ROUTINE", "ALTER ROUTINE", - "CREATE USER", "EVENT", "TRIGGER", "CREATE TABLESPACE" + "CREATE USER", "EVENT", "TRIGGER", "CREATE TABLESPACE", + "DELETE VERSIONING ROWS" }; static uint command_lengths[]= { 6, 6, 6, 6, 6, 4, 6, 8, 7, 4, 5, 10, 5, 5, 14, 5, 23, 11, 7, 17, 18, 11, 9, - 14, 13, 11, 5, 7, 17 + 14, 13, 11, 5, 7, 17, 22, }; diff --git a/sql/sql_acl.h b/sql/sql_acl.h index e3dba20422d..49a7844108e 100644 --- a/sql/sql_acl.h +++ b/sql/sql_acl.h @@ -49,6 +49,7 @@ #define EVENT_ACL (1UL << 26) #define TRIGGER_ACL (1UL << 27) #define CREATE_TABLESPACE_ACL (1UL << 28) +#define DELETE_VERSIONING_ROWS_ACL (1UL << 29) /* don't forget to update 1. static struct show_privileges_st sys_privileges[] @@ -62,12 +63,13 @@ (UPDATE_ACL | SELECT_ACL | INSERT_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_TMP_ACL | \ LOCK_TABLES_ACL | EXECUTE_ACL | CREATE_VIEW_ACL | SHOW_VIEW_ACL | \ - CREATE_PROC_ACL | ALTER_PROC_ACL | EVENT_ACL | TRIGGER_ACL) + CREATE_PROC_ACL | ALTER_PROC_ACL | EVENT_ACL | TRIGGER_ACL | \ + DELETE_VERSIONING_ROWS_ACL) #define TABLE_ACLS \ (SELECT_ACL | INSERT_ACL | UPDATE_ACL | DELETE_ACL | CREATE_ACL | DROP_ACL | \ GRANT_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL | CREATE_VIEW_ACL | \ - SHOW_VIEW_ACL | TRIGGER_ACL) + SHOW_VIEW_ACL | TRIGGER_ACL | DELETE_VERSIONING_ROWS_ACL) #define COL_ACLS \ (SELECT_ACL | INSERT_ACL | UPDATE_ACL | REFERENCES_ACL) @@ -85,7 +87,7 @@ CREATE_TMP_ACL | LOCK_TABLES_ACL | REPL_SLAVE_ACL | REPL_CLIENT_ACL | \ EXECUTE_ACL | CREATE_VIEW_ACL | SHOW_VIEW_ACL | CREATE_PROC_ACL | \ ALTER_PROC_ACL | CREATE_USER_ACL | EVENT_ACL | TRIGGER_ACL | \ - CREATE_TABLESPACE_ACL) + CREATE_TABLESPACE_ACL | DELETE_VERSIONING_ROWS_ACL) #define DEFAULT_CREATE_PROC_ACLS \ (ALTER_PROC_ACL | EXECUTE_ACL) @@ -117,31 +119,37 @@ CREATE_PROC_ACL | ALTER_PROC_ACL ) #define DB_CHUNK4 (EXECUTE_ACL) #define DB_CHUNK5 (EVENT_ACL | TRIGGER_ACL) +#define DB_CHUNK6 (DELETE_VERSIONING_ROWS_ACL) #define fix_rights_for_db(A) (((A) & DB_CHUNK0) | \ (((A) << 4) & DB_CHUNK1) | \ (((A) << 6) & DB_CHUNK2) | \ (((A) << 9) & DB_CHUNK3) | \ - (((A) << 2) & DB_CHUNK4))| \ - (((A) << 9) & DB_CHUNK5) + (((A) << 2) & DB_CHUNK4) | \ + (((A) << 9) & DB_CHUNK5) | \ + (((A) << 10) & DB_CHUNK6)) #define get_rights_for_db(A) (((A) & DB_CHUNK0) | \ (((A) & DB_CHUNK1) >> 4) | \ (((A) & DB_CHUNK2) >> 6) | \ (((A) & DB_CHUNK3) >> 9) | \ - (((A) & DB_CHUNK4) >> 2))| \ - (((A) & DB_CHUNK5) >> 9) + (((A) & DB_CHUNK4) >> 2) | \ + (((A) & DB_CHUNK5) >> 9) | \ + (((A) & DB_CHUNK6) >> 10)) #define TBL_CHUNK0 DB_CHUNK0 #define TBL_CHUNK1 DB_CHUNK1 #define TBL_CHUNK2 (CREATE_VIEW_ACL | SHOW_VIEW_ACL) #define TBL_CHUNK3 TRIGGER_ACL +#define TBL_CHUNK4 (DELETE_VERSIONING_ROWS_ACL) #define fix_rights_for_table(A) (((A) & TBL_CHUNK0) | \ (((A) << 4) & TBL_CHUNK1) | \ (((A) << 11) & TBL_CHUNK2) | \ - (((A) << 15) & TBL_CHUNK3)) + (((A) << 15) & TBL_CHUNK3) | \ + (((A) << 16) & TBL_CHUNK4)) #define get_rights_for_table(A) (((A) & TBL_CHUNK0) | \ (((A) & TBL_CHUNK1) >> 4) | \ (((A) & TBL_CHUNK2) >> 11) | \ - (((A) & TBL_CHUNK3) >> 15)) + (((A) & TBL_CHUNK3) >> 15) | \ + (((A) & TBL_CHUNK4) >> 16)) #define fix_rights_for_column(A) (((A) & 7) | (((A) & ~7) << 8)) #define get_rights_for_column(A) (((A) & 7) | ((A) >> 8)) #define fix_rights_for_procedure(A) ((((A) << 18) & EXECUTE_ACL) | \ @@ -175,6 +183,7 @@ enum mysql_db_table_field MYSQL_DB_FIELD_EXECUTE_PRIV, MYSQL_DB_FIELD_EVENT_PRIV, MYSQL_DB_FIELD_TRIGGER_PRIV, + MYSQL_DB_FIELD_DELETE_VERSIONING_ROWS_PRIV, MYSQL_DB_FIELD_COUNT }; diff --git a/sql/sql_alter.h b/sql/sql_alter.h index 25e377be8de..75435c7f275 100644 --- a/sql/sql_alter.h +++ b/sql/sql_alter.h @@ -100,6 +100,27 @@ public: enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE }; + bool data_modifying() const + { + return flags & ( + ALTER_ADD_COLUMN | + ALTER_DROP_COLUMN | + ALTER_CHANGE_COLUMN | + ALTER_COLUMN_ORDER); + } + + bool partition_modifying() const + { + return flags & ( + ALTER_DROP_PARTITION | + ALTER_COALESCE_PARTITION | + ALTER_REORGANIZE_PARTITION | + ALTER_REMOVE_PARTITIONING | + ALTER_TABLE_REORG | + ALTER_EXCHANGE_PARTITION | + ALTER_TRUNCATE_PARTITION); + } + /** The different values of the ALGORITHM clause. Describes which algorithm to use when altering the table. diff --git a/sql/sql_base.cc b/sql/sql_base.cc index a08078c5cff..4f4ea2a864c 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -5738,6 +5738,7 @@ find_field_in_table_ref(THD *thd, TABLE_LIST *table_list, if (field_to_set) { TABLE *table= field_to_set->table; + DBUG_ASSERT(table); if (thd->mark_used_columns == MARK_COLUMNS_READ) bitmap_set_bit(table->read_set, field_to_set->field_index); else @@ -6415,6 +6416,19 @@ mark_common_columns(THD *thd, TABLE_LIST *table_ref_1, TABLE_LIST *table_ref_2, bool is_using_column_1; if (!(nj_col_1= it_1.get_or_create_column_ref(thd, leaf_1))) goto err; + + if (nj_col_1->field() && nj_col_1->field()->vers_sys_field()) + continue; + + if (table_ref_1->is_view() && table_ref_1->table->versioned()) + { + Item *item= nj_col_1->view_field->item; + DBUG_ASSERT(item->type() == Item::FIELD_ITEM); + Item_field *item_field= (Item_field *)item; + if (item_field->field->vers_sys_field()) + continue; + } + field_name_1= nj_col_1->name(); is_using_column_1= using_fields && test_if_string_in_list(field_name_1->str, using_fields); @@ -7109,7 +7123,7 @@ bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array, thd->lex->current_select->is_item_list_lookup= 0; /* - To prevent fail on forward lookup we fill it with zerows, + To prevent fail on forward lookup we fill it with zeroes, then if we got pointer on zero after find_item_in_list we will know that it is forward lookup. @@ -7509,6 +7523,7 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, Field_iterator_table_ref field_iterator; bool found; char name_buff[SAFE_NAME_LEN+1]; + ulong vers_hide= thd->variables.vers_hide; DBUG_ENTER("insert_fields"); DBUG_PRINT("arena", ("stmt arena: %p",thd->stmt_arena)); @@ -7612,6 +7627,52 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name, if (!(item= field_iterator.create_item(thd))) DBUG_RETURN(TRUE); + if (item->type() == Item::FIELD_ITEM) + { + Item_field *f= static_cast<Item_field *>(item); + DBUG_ASSERT(f->field); + uint32 fl= f->field->flags; + bool sys_field= fl & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG); + SELECT_LEX *slex= thd->lex->current_select; + TABLE *table= f->field->table; + DBUG_ASSERT(table && table->pos_in_table_list); + TABLE_LIST *tl= table->pos_in_table_list; + vers_range_type_t vers_type= tl->vers_conditions.type; + + enum_sql_command sql_command= thd->lex->sql_command; + unsigned int create_options= thd->lex->create_info.options; + + if ( + sql_command == SQLCOM_CREATE_TABLE ? + sys_field && !(create_options & HA_VERSIONED_TABLE) : ( + sys_field ? + (sql_command == SQLCOM_CREATE_VIEW || + slex->nest_level > 0 || + vers_hide == VERS_HIDE_FULL || + ((fl & HIDDEN_FLAG) && ( + vers_hide == VERS_HIDE_IMPLICIT || + (vers_hide == VERS_HIDE_AUTO && ( + vers_type == FOR_SYSTEM_TIME_UNSPECIFIED || + vers_type == FOR_SYSTEM_TIME_AS_OF))))) : + (fl & HIDDEN_FLAG))) + { + continue; + } + } + else if (item->type() == Item::REF_ITEM) + { + Item *i= item; + while (i->type() == Item::REF_ITEM) + i= *((Item_ref *)i)->ref; + if (i->type() == Item::FIELD_ITEM) + { + Item_field *f= (Item_field *)i; + DBUG_ASSERT(f->field); + if (f->field->flags & HIDDEN_FLAG) + continue; + } + } + /* cache the table for the Item_fields inserted by expanding stars */ if (item->type() == Item::FIELD_ITEM && tables->cacheable_table) ((Item_field *)item)->cached_table= tables; @@ -7990,6 +8051,7 @@ fill_record(THD *thd, TABLE *table_arg, List<Item> &fields, List<Item> &values, goto err; } value=v++; + DBUG_ASSERT(value); Field *rfield= field->field; TABLE* table= rfield->table; if (table->next_number_field && @@ -8006,6 +8068,15 @@ fill_record(THD *thd, TABLE *table_arg, List<Item> &fields, List<Item> &values, ER_THD(thd, ER_WARNING_NON_DEFAULT_VALUE_FOR_VIRTUAL_COLUMN), rfield->field_name.str, table->s->table_name.str); } + if (table->versioned() && rfield->vers_sys_field() && + !ignore_errors) + { + if (type == Item::DEFAULT_VALUE_ITEM) + continue; + my_error(ER_VERS_READONLY_FIELD, MYF(0), rfield->field_name.str); + goto err; + } + if (rfield->stored_in_db() && (value->save_in_field(rfield, 0)) < 0 && !ignore_errors) { @@ -8074,7 +8145,7 @@ void switch_defaults_to_nullable_trigger_fields(TABLE *table) Field **trigger_field= table->field_to_fill(); /* True if we have NOT NULL fields and BEFORE triggers */ - if (trigger_field != table->field) + if (*trigger_field != *table->field) { for (Field **field_ptr= table->default_field; *field_ptr ; field_ptr++) { @@ -8236,6 +8307,12 @@ fill_record(THD *thd, TABLE *table, Field **ptr, List<Item> &values, /* Ensure that all fields are from the same table */ DBUG_ASSERT(field->table == table); + if (table->versioned() && field->vers_sys_field() && !ignore_errors) + { + my_error(ER_VERS_READONLY_FIELD, MYF(0), field->field_name.str); + goto err; + } + value=v++; if (field->field_index == autoinc_index) table->auto_increment_field_not_null= TRUE; @@ -8426,7 +8503,6 @@ int init_ftfuncs(THD *thd, SELECT_LEX *select_lex, bool no_order) { List_iterator<Item_func_match> li(*(select_lex->ftfunc_list)); Item_func_match *ifm; - DBUG_PRINT("info",("Performing FULLTEXT search")); while ((ifm=li++)) if (ifm->init_search(thd, no_order)) @@ -8616,10 +8692,31 @@ open_log_table(THD *thd, TABLE_LIST *one_table, Open_tables_backup *backup) if ((table= open_ltable(thd, one_table, one_table->lock_type, flags))) { - DBUG_ASSERT(table->s->table_category == TABLE_CATEGORY_LOG); - /* Make sure all columns get assigned to a default value */ - table->use_all_columns(); - DBUG_ASSERT(table->s->no_replicate); + if (table->s->table_category == TABLE_CATEGORY_LOG) + { + /* Make sure all columns get assigned to a default value */ + table->use_all_columns(); + DBUG_ASSERT(table->s->no_replicate); + } + else + { + my_error(ER_NOT_LOG_TABLE, MYF(0), table->s->db.str, table->s->table_name.str); + int error= 0; + if (table->current_lock != F_UNLCK) + { + table->current_lock= F_UNLCK; + error= table->file->ha_external_lock(thd, F_UNLCK); + } + if (error) + table->file->print_error(error, MYF(0)); + else + { + tc_release_table(table); + thd->reset_open_tables_state(thd); + thd->restore_backup_open_tables_state(backup); + table= NULL; + } + } } else thd->restore_backup_open_tables_state(backup); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 628671802a0..051b7366372 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -710,7 +710,6 @@ extern "C" void thd_kill_timeout(THD* thd) mysql_mutex_unlock(&thd->LOCK_thd_data); } - THD::THD(my_thread_id id, bool is_wsrep_applier) :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, /* statement id */ 0), @@ -3709,6 +3708,7 @@ void Query_arena::set_query_arena(Query_arena *set) mem_root= set->mem_root; free_list= set->free_list; state= set->state; + is_stored_procedure= set->is_stored_procedure; } @@ -4779,7 +4779,7 @@ extern "C" int thd_slave_thread(const MYSQL_THD thd) of the current query. */ extern "C" unsigned long long thd_start_utime(const MYSQL_THD thd) { - return thd->start_utime; + return thd->start_time * 1000000 + thd->start_time_sec_part; } @@ -7050,6 +7050,15 @@ static bool protect_against_unsafe_warning_flood(int unsafe_type) DBUG_RETURN(unsafe_warning_suppression_active[unsafe_type]); } +MYSQL_TIME THD::query_start_TIME() +{ + MYSQL_TIME res; + variables.time_zone->gmt_sec_to_TIME(&res, query_start()); + res.second_part= query_start_sec_part(); + time_zone_used= 1; + return res; +} + /** Auxiliary method used by @c binlog_query() to raise warnings. @@ -7669,3 +7678,16 @@ void Database_qualified_name::copy(MEM_ROOT *mem_root, #endif /* !defined(MYSQL_CLIENT) */ + + +Query_arena_stmt::Query_arena_stmt(THD *_thd) : + thd(_thd) +{ + arena= thd->activate_stmt_arena_if_needed(&backup); +} + +Query_arena_stmt::~Query_arena_stmt() +{ + if (arena) + thd->restore_active_arena(arena, &backup); +} diff --git a/sql/sql_class.h b/sql/sql_class.h index 038e1648596..3cc114bd366 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -155,6 +155,7 @@ extern bool volatile shutdown_in_progress; extern "C" LEX_STRING * thd_query_string (MYSQL_THD thd); extern "C" size_t thd_query_safe(MYSQL_THD thd, char *buf, size_t buflen); +void thd_vers_update_trt(THD *thd, bool value); /** @class CSET_STRING @@ -702,6 +703,12 @@ typedef struct system_variables uint column_compression_threshold; uint column_compression_zlib_level; ulong in_subquery_conversion_threshold; + + st_vers_asof_timestamp vers_asof_timestamp; + my_bool vers_force; + ulong vers_hide; + my_bool vers_innodb_algorithm_simple; + ulong vers_alter_history; } SV; /** @@ -942,6 +949,11 @@ public: enum_state state; +protected: + friend class sp_head; + bool is_stored_procedure; + +public: /* We build without RTTI, so dynamic_cast can't be used. */ enum Type { @@ -949,7 +961,8 @@ public: }; Query_arena(MEM_ROOT *mem_root_arg, enum enum_state state_arg) : - free_list(0), mem_root(mem_root_arg), state(state_arg) + free_list(0), mem_root(mem_root_arg), state(state_arg), + is_stored_procedure(state_arg == STMT_INITIALIZED_FOR_SP ? true : false) { INIT_ARENA_DBUG_INFO; } /* This constructor is used only when Query_arena is created as @@ -969,6 +982,8 @@ public: { return state == STMT_PREPARED || state == STMT_EXECUTED; } inline bool is_conventional() const { return state == STMT_CONVENTIONAL_EXECUTION; } + inline bool is_sp_execute() const + { return is_stored_procedure; } inline void* alloc(size_t size) { return alloc_root(mem_root,size); } inline void* calloc(size_t size) @@ -1013,6 +1028,22 @@ public: }; +class Query_arena_stmt +{ + THD *thd; + Query_arena backup; + Query_arena *arena; + +public: + Query_arena_stmt(THD *_thd); + ~Query_arena_stmt(); + bool arena_replaced() + { + return arena != NULL; + } +}; + + class Server_side_cursor; /** @@ -3287,11 +3318,38 @@ public: inline my_time_t query_start() { query_start_used=1; return start_time; } inline ulong query_start_sec_part() { query_start_sec_part_used=1; return start_time_sec_part; } + MYSQL_TIME query_start_TIME(); + + void start_time_inc() + { + ++start_time_sec_part; + if (start_time_sec_part == HRTIME_RESOLUTION) + { + ++start_time; + start_time_sec_part= 0; + } + } + + bool start_time_ge(my_time_t secs, ulong usecs) + { + return (start_time == secs && start_time_sec_part >= usecs) || + start_time > secs; + } + inline void set_current_time() { my_hrtime_t hrtime= my_hrtime(); - start_time= hrtime_to_my_time(hrtime); - start_time_sec_part= hrtime_sec_part(hrtime); + my_time_t secs= hrtime_to_my_time(hrtime); + ulong usecs= hrtime_sec_part(hrtime); + if (start_time_ge(secs, usecs)) + { + start_time_inc(); + } + else + { + start_time= secs; + start_time_sec_part= usecs; + } #ifdef HAVE_PSI_THREAD_INTERFACE PSI_THREAD_CALL(set_thread_start_time)(start_time); #endif @@ -4083,7 +4141,12 @@ public: Lex_input_stream *lip= &m_parser_state->m_lip; if (!yytext) { - if (!(yytext= lip->get_tok_start())) + if (lip->lookahead_token >= 0) + yytext= lip->get_tok_start_prev(); + else + yytext= lip->get_tok_start(); + + if (!yytext) yytext= ""; } /* Push an error into the error stack */ @@ -4501,6 +4564,7 @@ public: /* Handling of timeouts for commands */ thr_timer_t query_timer; + public: void set_query_timer() { @@ -5721,6 +5785,12 @@ class multi_update :public select_result_interceptor /* Need this to protect against multiple prepare() calls */ bool prepared; + + // For System Versioning (may need to insert new fields to a table). + ha_rows updated_sys_ver; + + bool has_vers_fields; + public: multi_update(THD *thd_arg, TABLE_LIST *ut, List<TABLE_LIST> *leaves_list, List<Item> *fields, List<Item> *values, @@ -6300,6 +6370,24 @@ public: void dbug_serve_apcs(THD *thd, int n_calls); #endif -#endif /* MYSQL_SERVER */ +class ScopedStatementReplication +{ +public: + ScopedStatementReplication(THD *thd) : thd(thd) + { + if (thd) + saved_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + } + ~ScopedStatementReplication() + { + if (thd) + thd->restore_stmt_binlog_format(saved_binlog_format); + } +private: + enum_binlog_format saved_binlog_format; + THD *thd; +}; + +#endif /* MYSQL_SERVER */ #endif /* SQL_CLASS_INCLUDED */ diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 984a0799b54..2ea2af73743 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -225,6 +225,18 @@ bool Update_plan::save_explain_data_intern(MEM_ROOT *mem_root, static bool record_should_be_deleted(THD *thd, TABLE *table, SQL_SELECT *sel, Explain_delete *explain) { + if (table->versioned()) + { + bool row_is_alive= table->vers_end_field()->is_max(); + /* If we are doing TRUNCATE TABLE with SYSTEM_TIME condition then historical + record is deleted and current record is kept. Otherwise alive record is + deleted and historical record is kept. */ + if ((thd->lex->sql_command == SQLCOM_TRUNCATE && table->pos_in_table_list->vers_conditions) + ? row_is_alive + : !row_is_alive) + return false; + } + explain->tracker.on_record_read(); thd->inc_examined_row_count(1); if (table->vfield) @@ -238,6 +250,18 @@ static bool record_should_be_deleted(THD *thd, TABLE *table, SQL_SELECT *sel, } +inline +int TABLE::delete_row() +{ + if (!versioned_by_sql() || !vers_end_field()->is_max()) + return file->ha_delete_row(record[0]); + + store_record(this, record[1]); + vers_end_field()->set_time(); + return file->ha_update_row(record[1], record[0]); +} + + /** Implement DELETE SQL word. @@ -284,6 +308,24 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, DBUG_RETURN(TRUE); THD_STAGE_INFO(thd, stage_init_update); + + bool truncate_history= table_list->vers_conditions; + if (truncate_history) + { + TABLE *table= table_list->table; + DBUG_ASSERT(table); + + DBUG_ASSERT(!conds); + if (select_lex->vers_setup_conds(thd, table_list, &conds)) + DBUG_RETURN(TRUE); + + // trx_sees() in InnoDB reads sys_trx_start + if (!table->versioned_by_sql()) { + DBUG_ASSERT(table_list->vers_conditions.type == FOR_SYSTEM_TIME_BEFORE); + bitmap_set_bit(table->read_set, table->vers_end_field()->field_index); + } + } + if (mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT)) DBUG_RETURN(TRUE); if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE)) @@ -378,7 +420,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, table->triggers->has_delete_triggers()); if (!with_select && !using_limit && const_cond_result && (!thd->is_current_stmt_binlog_format_row() && - !has_triggers)) + !has_triggers) + && !table->versioned_by_sql()) { /* Update the table->file->stats.records number */ table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); @@ -680,7 +723,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, delete_record= record_should_be_deleted(thd, table, select, explain); if (delete_record) { - if (table->triggers && + if (!truncate_history && table->triggers && table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_BEFORE, FALSE)) { @@ -694,10 +737,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, break; } - if (!(error= table->file->ha_delete_row(table->record[0]))) + error= table->delete_row(); + if (!error) { deleted++; - if (table->triggers && + if (!truncate_history && table->triggers && table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_AFTER, FALSE)) { @@ -784,6 +828,8 @@ cleanup: else errcode= query_error_code(thd, killed_status == NOT_KILLED); + ScopedStatementReplication scoped_stmt_rpl( + table->versioned_by_engine() ? thd : NULL); /* [binlog]: If 'handler::delete_all_rows()' was called and the storage engine does not inject the rows itself, we replicate @@ -1169,6 +1215,11 @@ int multi_delete::send_data(List<Item> &values) if (table->status & (STATUS_NULL_ROW | STATUS_DELETED)) continue; + if (table->versioned() && !table->vers_end_field()->is_max()) + { + continue; + } + table->file->position(table->record[0]); found++; @@ -1181,7 +1232,9 @@ int multi_delete::send_data(List<Item> &values) TRG_ACTION_BEFORE, FALSE)) DBUG_RETURN(1); table->status|= STATUS_DELETED; - if (!(error=table->file->ha_delete_row(table->record[0]))) + + error= table->delete_row(); + if (!error) { deleted++; if (!table->file->has_transactions()) @@ -1360,8 +1413,8 @@ int multi_delete::do_table_deletes(TABLE *table, SORT_INFO *sort_info, local_error= 1; break; } - - local_error= table->file->ha_delete_row(table->record[0]); + + local_error= table->delete_row(); if (local_error && !ignore) { table->file->print_error(local_error, MYF(0)); diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index 0d237b94af2..af3add1a5b2 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -734,6 +734,127 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived) cursor= cursor->next_local) cursor->outer_join|= JOIN_TYPE_OUTER; } + + // System Versioning: fix system fields of versioned derived table + if ((thd->stmt_arena->is_stmt_prepare() || !thd->stmt_arena->is_stmt_execute()) + && sl->table_list.elements > 0) + { + // Similar logic as in mysql_create_view() + // Leading versioning table detected implicitly (first one selected) + TABLE_LIST *impli_table= NULL; + // Leading versioning table specified explicitly + // (i.e. if at least one system field is selected) + TABLE_LIST *expli_table= NULL; + LEX_CSTRING impli_start, impli_end; + Item_field *expli_start= NULL, *expli_end= NULL; + + for (TABLE_LIST *table= sl->table_list.first; table; table= table->next_local) + { + if (!table->table || !table->table->versioned()) + continue; + + const LString_i table_start= table->table->vers_start_field()->field_name; + const LString_i table_end= table->table->vers_end_field()->field_name; + if (!impli_table) + { + impli_table= table; + impli_start= table_start; + impli_end= table_end; + } + + /* Implicitly add versioning fields if needed */ + Item *item; + List_iterator_fast<Item> it(sl->item_list); + + DBUG_ASSERT(table->alias); + while ((item= it++)) + { + if (item->real_item()->type() != Item::FIELD_ITEM) + continue; + Item_field *fld= (Item_field*) (item->real_item()); + if (fld->table_name && 0 != my_strcasecmp(table_alias_charset, table->alias, fld->table_name)) + continue; + DBUG_ASSERT(fld->field_name.str); + if (table_start == fld->field_name) + { + if (expli_start) + { + my_printf_error( + ER_VERS_DERIVED_PROHIBITED, + "Derived table is prohibited: multiple start system fields `%s.%s`, `%s.%s` in query!", MYF(0), + expli_table->alias, + expli_start->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto exit; + } + if (expli_table) + { + if (expli_table != table) + { +expli_table_err: + my_printf_error( + ER_VERS_DERIVED_PROHIBITED, + "Derived table is prohibited: system fields from multiple tables `%s`, `%s` in query!", MYF(0), + expli_table->alias, + table->alias); + res= true; + goto exit; + } + } + else + expli_table= table; + expli_start= fld; + impli_end= table_end; + } + else if (table_end == fld->field_name) + { + if (expli_end) + { + my_printf_error( + ER_VERS_DERIVED_PROHIBITED, + "Derived table is prohibited: multiple end system fields `%s.%s`, `%s.%s` in query!", MYF(0), + expli_table->alias, + expli_end->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto exit; + } + if (expli_table) + { + if (expli_table != table) + goto expli_table_err; + } + else + expli_table= table; + expli_end= fld; + impli_start= table_start; + } + } // while ((item= it++)) + } // for (TABLE_LIST *table) + + if (expli_table) + impli_table= expli_table; + + if (impli_table) + { + Query_arena_stmt on_stmt_arena(thd); + if (!expli_start && (res= sl->vers_push_field(thd, impli_table, impli_start))) + goto exit; + if (!expli_end && (res= sl->vers_push_field(thd, impli_table, impli_end))) + goto exit; + + if (impli_table->vers_conditions) + { + sl->vers_export_outer= impli_table->vers_conditions; + } + else + sl->vers_import_outer= true; // FIXME: is needed? + } + } // if (sl->table_list.elements > 0) + // System Versioning end } unit->derived= derived; diff --git a/sql/sql_error.h b/sql/sql_error.h index f8b8adc805a..b89af72cc44 100644 --- a/sql/sql_error.h +++ b/sql/sql_error.h @@ -1174,7 +1174,7 @@ public: void copy_non_errors_from_wi(THD *thd, const Warning_info *src_wi); -private: +protected: Warning_info *get_warning_info() { return m_wi_stack.front(); } const Warning_info *get_warning_info() const { return m_wi_stack.front(); } diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 627e349c50b..950635d74e3 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -222,7 +222,7 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list, table_list->view_db.str, table_list->view_name.str); DBUG_RETURN(-1); } - if (values.elements != table->s->fields) + if (values.elements != table->vers_user_fields()) { my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), 1L); DBUG_RETURN(-1); @@ -676,6 +676,23 @@ static void save_insert_query_plan(THD* thd, TABLE_LIST *table_list) } +Field **TABLE::field_to_fill() +{ + return triggers && triggers->nullable_fields() ? triggers->nullable_fields() : field; +} + + +inline +Field **TABLE::vers_user_field_to_fill() +{ + if (versioned()) + { + return triggers && triggers->vers_user_fields() ? triggers->vers_user_fields() : vers_user_field; + } + return field_to_fill(); +} + + /** INSERT statement implementation @@ -1002,7 +1019,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, } table->reset_default_fields(); if (fill_record_n_invoke_before_triggers(thd, table, - table->field_to_fill(), + table->vers_user_field_to_fill(), *values, 0, TRG_EVENT_INSERT)) { if (values_list.elements != 1 && ! thd->is_error()) @@ -1033,6 +1050,9 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, } } + if (table->versioned()) + table->vers_update_fields(); + if ((res= table_list->view_check_option(thd, (values_list.elements == 1 ? 0 : @@ -1138,8 +1158,10 @@ values_loop_end: } else errcode= query_error_code(thd, thd->killed == NOT_KILLED); - - /* bug#22725: + + ScopedStatementReplication scoped_stmt_rpl( + table->versioned_by_engine() ? thd : NULL); + /* bug#22725: A query which per-row-loop can not be interrupted with KILLED, like INSERT, and that does not invoke stored @@ -1559,6 +1581,13 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, if (!table) table= table_list->table; + if (table->versioned_by_sql() && duplic == DUP_REPLACE) + { + // Additional memory may be required to create historical items. + if (table_list->set_insert_values(thd->mem_root)) + DBUG_RETURN(TRUE); + } + if (!select_insert) { Item *fake_conds= 0; @@ -1610,6 +1639,25 @@ static int last_uniq_key(TABLE *table,uint keynr) /* + Inserts one historical row to a table. + + Copies content of the row from table->record[1] to table->record[0], + sets Sys_end to now() and calls ha_write_row() . +*/ + +int vers_insert_history_row(TABLE *table) +{ + DBUG_ASSERT(table->versioned_by_sql()); + restore_record(table,record[1]); + + // Set Sys_end to now() + if (table->vers_end_field()->set_time()) + DBUG_ASSERT(0); + + return table->file->ha_write_row(table->record[0]); +} + +/* Write a record to table with optional deleting of conflicting records, invoke proper triggers if needed. @@ -1813,7 +1861,26 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) } if (error != HA_ERR_RECORD_IS_THE_SAME) + { info->updated++; + if (table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if ((error= vers_insert_history_row(table))) + { + info->last_errno= error; + table->file->print_error(error, MYF(0)); + trg_error= 1; + restore_record(table, record[2]); + goto ok_or_after_trg_err; + } + restore_record(table, record[2]); + } + info->copied++; + } + } else error= 0; /* @@ -1865,17 +1932,35 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) tables which have ON UPDATE but have no ON DELETE triggers, we just should not expose this fact to users by invoking ON UPDATE triggers. - */ - if (last_uniq_key(table,key_nr) && - !table->file->referenced_by_foreign_key() && + For system versioning wa also use path through delete since we would + save nothing through this cheating. + */ + if (last_uniq_key(table,key_nr) && + !table->file->referenced_by_foreign_key() && (!table->triggers || !table->triggers->has_delete_triggers())) { + if (table->versioned_by_engine()) + { + bitmap_set_bit(table->write_set, table->vers_start_field()->field_index); + table->vers_start_field()->set_notnull(); + table->vers_start_field()->store(0, false); + } if ((error=table->file->ha_update_row(table->record[1], - table->record[0])) && + table->record[0])) && error != HA_ERR_RECORD_IS_THE_SAME) goto err; if (error != HA_ERR_RECORD_IS_THE_SAME) + { info->deleted++; + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + error= vers_insert_history_row(table); + restore_record(table, record[2]); + if (error) + goto err; + } + } else error= 0; thd->record_first_successful_insert_id_in_cur_stmt(table->file->insert_id_for_cur_row); @@ -1891,9 +1976,29 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info) table->triggers->process_triggers(thd, TRG_EVENT_DELETE, TRG_ACTION_BEFORE, TRUE)) goto before_trg_err; - if ((error=table->file->ha_delete_row(table->record[1]))) + + if (!table->versioned_by_sql()) + error= table->file->ha_delete_row(table->record[1]); + else + { + DBUG_ASSERT(table->insert_values); + store_record(table,insert_values); + restore_record(table,record[1]); + if (table->vers_end_field()->set_time()) + { + error= 1; + goto err; + } + error= table->file->ha_update_row(table->record[1], + table->record[0]); + restore_record(table,insert_values); + } + if (error) goto err; - info->deleted++; + if (!table->versioned_by_sql()) + info->deleted++; + else + info->updated++; if (!table->file->has_transactions()) thd->transaction.stmt.modified_non_trans_table= TRUE; if (table->triggers && @@ -1981,7 +2086,9 @@ int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, TABLE_LIST *t for (Field **field=entry->field ; *field ; field++) { if (!bitmap_is_set(write_set, (*field)->field_index) && - has_no_default_value(thd, *field, table_list)) + !(*field)->vers_sys_field() && + has_no_default_value(thd, *field, table_list) && + ((*field)->real_type() != MYSQL_TYPE_ENUM)) err=1; } return thd->abort_on_warning ? err : 0; @@ -2507,6 +2614,9 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd) } *field=0; + if (copy->versioned() && copy->vers_update_user_field(client_thd->mem_root)) + goto error; + if (share->virtual_fields || share->default_expressions || share->default_fields) { @@ -3736,6 +3846,8 @@ int select_insert::send_data(List<Item> &values) DBUG_RETURN(0); thd->count_cuted_fields= CHECK_FIELD_WARN; // Calculate cuted fields + if (table->versioned()) + table->vers_update_fields(); store_values(values); if (table->default_field && table->update_default_fields(0, info.ignore)) DBUG_RETURN(1); @@ -3794,12 +3906,16 @@ int select_insert::send_data(List<Item> &values) void select_insert::store_values(List<Item> &values) { + DBUG_ENTER("select_insert::store_values"); + if (fields->elements) fill_record_n_invoke_before_triggers(thd, table, *fields, values, 1, TRG_EVENT_INSERT); else - fill_record_n_invoke_before_triggers(thd, table, table->field_to_fill(), + fill_record_n_invoke_before_triggers(thd, table, table->vers_user_field_to_fill(), values, 1, TRG_EVENT_INSERT); + + DBUG_VOID_RETURN; } bool select_insert::prepare_eof() @@ -4089,6 +4205,12 @@ static TABLE *create_table_from_items(THD *thd, alter_info->create_list.push_back(cr_field, thd->mem_root); } + if (create_info->vers_info.check_and_fix_implicit( + thd, alter_info, create_info, create_table->table_name)) + { + DBUG_RETURN(NULL); + } + DEBUG_SYNC(thd,"create_table_select_before_create"); /* Check if LOCK TABLES + CREATE OR REPLACE of existing normal table*/ diff --git a/sql/sql_insert.h b/sql/sql_insert.h index aea0dac6b0d..6efd680d188 100644 --- a/sql/sql_insert.h +++ b/sql/sql_insert.h @@ -37,6 +37,7 @@ void upgrade_lock_type_for_insert(THD *thd, thr_lock_type *lock_type, bool is_multi_insert); int check_that_all_fields_are_given_values(THD *thd, TABLE *entry, TABLE_LIST *table_list); +int vers_insert_history_row(TABLE *table); int write_record(THD *thd, TABLE *table, COPY_INFO *info); void kill_delayed_threads(void); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 6cb065c3985..db22669d287 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -33,9 +33,9 @@ #include "sql_signal.h" -void LEX::parse_error() +void LEX::parse_error(uint err_number) { - thd->parse_error(); + thd->parse_error(err_number); } @@ -771,6 +771,8 @@ void LEX::start(THD *thd_arg) frame_bottom_bound= NULL; win_spec= NULL; + vers_conditions.empty(); + is_lex_started= TRUE; DBUG_VOID_RETURN; } @@ -1340,6 +1342,8 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) return WITH_CUBE_SYM; case ROLLUP_SYM: return WITH_ROLLUP_SYM; + case SYSTEM: + return WITH_SYSTEM_SYM; default: /* Save the token following 'WITH' @@ -1350,6 +1354,27 @@ int MYSQLlex(YYSTYPE *yylval, THD *thd) return WITH; } break; + case FOR_SYM: + /* + * Additional look-ahead to resolve doubtful cases like: + * SELECT ... FOR UPDATE + * SELECT ... FOR SYSTEM_TIME ... . + */ + token= lex_one_token(yylval, thd); + lip->add_digest_token(token, yylval); + switch(token) { + case SYSTEM_TIME_SYM: + return FOR_SYSTEM_TIME_SYM; + default: + /* + Save the token following 'FOR_SYM' + */ + lip->lookahead_yylval= lip->yylval; + lip->yylval= NULL; + lip->lookahead_token= token; + return FOR_SYM; + } + break; case VALUES: if (thd->lex->current_select->parsing_place == IN_UPDATE_ON_DUP_KEY || thd->lex->current_select->parsing_place == IN_PART_FUNC) @@ -2286,6 +2311,10 @@ void st_select_lex::init_select() in_funcs.empty(); curr_tvc_name= 0; in_tvc= false; + vers_saved_where= NULL; + vers_export_outer.empty(); + vers_import_outer= false; + versioned_tables= 0; } /* @@ -3022,8 +3051,7 @@ void Query_tables_list::destroy_query_tables_list() */ LEX::LEX() - : explain(NULL), - result(0), arena_for_set_stmt(0), mem_root_for_set_stmt(0), + : explain(NULL), result(0), arena_for_set_stmt(0), mem_root_for_set_stmt(0), option_type(OPT_DEFAULT), context_analysis_only(0), sphead(0), is_lex_started(0), limit_rows_examined_cnt(ULONGLONG_MAX) { @@ -3753,6 +3781,7 @@ void st_select_lex::fix_prepare_information(THD *thd, Item **conds, DBUG_ENTER("st_select_lex::fix_prepare_information"); if (!thd->stmt_arena->is_conventional() && first_execution) { + Query_arena_stmt on_stmt_arena(thd); first_execution= 0; if (group_list.first) { @@ -7306,6 +7335,20 @@ int set_statement_var_if_exists(THD *thd, const char *var_name, } +Query_tables_backup::Query_tables_backup(THD* _thd) : + thd(_thd) +{ + thd->lex->reset_n_backup_query_tables_list(&backup); + thd->lex->sql_command= backup.sql_command; +} + + +Query_tables_backup::~Query_tables_backup() +{ + thd->lex->restore_backup_query_tables_list(&backup); +} + + bool LEX::sp_add_cfetch(THD *thd, const LEX_CSTRING *name) { uint offset; @@ -7437,3 +7480,26 @@ Item *LEX::make_item_func_replace(THD *thd, new (thd->mem_root) Item_func_replace_oracle(thd, org, find, replace) : new (thd->mem_root) Item_func_replace(thd, org, find, replace); } + + +bool SELECT_LEX::vers_push_field(THD *thd, TABLE_LIST *table, const LEX_CSTRING field_name) +{ + DBUG_ASSERT(field_name.str); + Item_field *fld= new (thd->mem_root) Item_field(thd, &context, + table->db, table->alias, &field_name); + if (!fld) + return true; + + item_list.push_back(fld); + + if (thd->lex->view_list.elements) + { + if (LEX_STRING *l= thd->make_lex_string(field_name.str, field_name.length)) + thd->lex->view_list.push_back(l); + else + return true; + } + + return false; +} + diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 069b75628e3..94fdc6442ab 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -582,6 +582,10 @@ public: */ uint8 uncacheable; enum sub_select_type linkage; + bool is_linkage_set() const + { + return linkage == UNION_TYPE || linkage == INTERSECT_TYPE || linkage == EXCEPT_TYPE; + } bool no_table_names_allowed; /* used for global order by */ static void *operator new(size_t size, MEM_ROOT *mem_root) throw () @@ -1040,6 +1044,17 @@ public: table_value_constr *tvc; bool in_tvc; + /** System Versioning */ +private: + Item *vers_saved_where; +public: + vers_select_conds_t vers_export_outer; + bool vers_import_outer; + uint versioned_tables; + int vers_setup_conds(THD *thd, TABLE_LIST *tables, COND **where_expr); + /* push new Item_field into item_list */ + bool vers_push_field(THD *thd, TABLE_LIST *table, const LEX_CSTRING field_name); + void init_query(); void init_select(); st_select_lex_unit* master_unit() { return (st_select_lex_unit*) master; } @@ -1974,6 +1989,18 @@ private: }; +class Query_tables_backup +{ + THD *thd; + Query_tables_list backup; + +public: + Query_tables_backup(THD *_thd); + ~Query_tables_backup(); + const Query_tables_list& get() const { return backup; } +}; + + /* st_parsing_options contains the flags for constructions that are allowed in the current statement. @@ -2710,7 +2737,6 @@ struct LEX: public Query_tables_list private: Query_arena_memroot *arena_for_set_stmt; MEM_ROOT *mem_root_for_set_stmt; - void parse_error(); bool sp_block_finalize(THD *thd, const Lex_spblock_st spblock, class sp_label **splabel); bool sp_change_context(THD *thd, const sp_pcontext *ctx, bool exclusive); @@ -2724,6 +2750,7 @@ private: bool sp_for_loop_increment(THD *thd, const Lex_for_loop_st &loop); public: + void parse_error(uint err_number= ER_SYNTAX_ERROR); inline bool is_arena_for_set_stmt() {return arena_for_set_stmt != 0;} bool set_arena_for_set_stmt(Query_arena *backup); void reset_arena_for_set_stmt(Query_arena *backup); @@ -2981,6 +3008,9 @@ public: Window_frame_bound *frame_bottom_bound; Window_spec *win_spec; + /* System Versioning */ + vers_select_conds_t vers_conditions; + inline void free_set_stmt_mem_root() { DBUG_ASSERT(!is_arena_for_set_stmt()); @@ -3685,6 +3715,11 @@ public: bool add_grant_command(THD *thd, enum_sql_command sql_command_arg, stored_procedure_type type_arg); + + Vers_parse_info &vers_get_info() + { + return create_info.vers_info; + } }; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 346d1c0f18f..a255c477cee 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -112,6 +112,7 @@ #include "wsrep_mysqld.h" #include "wsrep_thd.h" +#include "vtmd.h" static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, Parser_state *parser_state, @@ -3165,6 +3166,11 @@ bool Sql_cmd_call::execute(THD *thd) if (do_execute_sp(thd, sp)) return true; + if (sp->sp_cache_version() == ULONG_MAX) + { + sp_cache_flush(thd->sp_proc_cache, sp); + } + /* Disable slow log for the above call(), if calls are disabled. Instead we will log the executed statements to the slow log. @@ -4023,7 +4029,6 @@ mysql_execute_command(THD *thd) copy. */ Alter_info alter_info(lex->alter_info, thd->mem_root); - if (thd->is_fatal_error) { /* If out of memory when creating a copy of alter_info. */ @@ -4051,6 +4056,7 @@ mysql_execute_command(THD *thd) */ if (!(create_info.used_fields & HA_CREATE_USED_ENGINE)) create_info.use_default_db_type(thd); + /* If we are using SET CHARSET without DEFAULT, add an implicit DEFAULT to not confuse old users. (This may change). @@ -4237,6 +4243,11 @@ mysql_execute_command(THD *thd) } else { + if (create_info.vers_info.check_and_fix_implicit( + thd, &alter_info, &create_info, create_table->table_name)) + { + goto end_with_restore_list; + } /* In STATEMENT format, we probably have to replicate also temporary tables, like mysql replication does. Also check if the requested @@ -6421,6 +6432,21 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) if (check_dependencies_in_with_clauses(lex->with_clauses_list)) return 1; + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_local) + { + if (table->vers_conditions) + { + VTMD_exists vtmd(*table); + if (vtmd.check_exists(thd)) + return 1; + if (vtmd.exists && vtmd.setup_select(thd)) + return 1; + } + } + } + if (!(res= open_and_lock_tables(thd, all_tables, TRUE, 0))) { if (lex->describe) diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index fadd7009822..55d906bb3b4 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -67,6 +67,7 @@ #include "opt_range.h" // store_key_image_to_rec #include "sql_alter.h" // Alter_table_ctx #include "sql_select.h" +#include "sql_tablespace.h" // check_tablespace_name #include <algorithm> using std::max; @@ -87,6 +88,7 @@ static int get_partition_id_list_col(partition_info *, uint32 *, longlong *); static int get_partition_id_list(partition_info *, uint32 *, longlong *); static int get_partition_id_range_col(partition_info *, uint32 *, longlong *); static int get_partition_id_range(partition_info *, uint32 *, longlong *); +static int vers_get_partition_id(partition_info *, uint32 *, longlong *); static int get_part_id_charset_func_part(partition_info *, uint32 *, longlong *); static int get_part_id_charset_func_subpart(partition_info *, uint32 *); static int get_partition_id_hash_nosub(partition_info *, uint32 *, longlong *); @@ -1295,6 +1297,24 @@ static void set_up_partition_func_pointers(partition_info *part_info) part_info->get_subpartition_id= get_partition_id_hash_sub; } } + else if (part_info->part_type == VERSIONING_PARTITION) + { + part_info->get_part_partition_id= vers_get_partition_id; + if (part_info->list_of_subpart_fields) + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_key_sub; + else + part_info->get_subpartition_id= get_partition_id_key_sub; + } + else + { + if (part_info->linear_hash_ind) + part_info->get_subpartition_id= get_partition_id_linear_hash_sub; + else + part_info->get_subpartition_id= get_partition_id_hash_sub; + } + } else /* LIST Partitioning */ { if (part_info->column_list) @@ -1335,6 +1355,10 @@ static void set_up_partition_func_pointers(partition_info *part_info) else part_info->get_partition_id= get_partition_id_list; } + else if (part_info->part_type == VERSIONING_PARTITION) + { + part_info->get_partition_id= vers_get_partition_id; + } else /* HASH partitioning */ { if (part_info->list_of_part_fields) @@ -1607,6 +1631,7 @@ bool fix_partition_func(THD *thd, TABLE *table, } } DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION); + DBUG_ASSERT(part_info->part_type != VERSIONING_PARTITION || part_info->column_list); /* Partition is defined. We need to verify that partitioning function is correct. @@ -1639,6 +1664,9 @@ bool fix_partition_func(THD *thd, TABLE *table, const char *error_str; if (part_info->column_list) { + if (part_info->part_type == VERSIONING_PARTITION && + part_info->vers_setup_expression(thd)) + goto end; List_iterator<const char> it(part_info->part_field_list); if (unlikely(handle_list_of_fields(thd, it, table, part_info, FALSE))) goto end; @@ -1662,6 +1690,12 @@ bool fix_partition_func(THD *thd, TABLE *table, if (unlikely(part_info->check_list_constants(thd))) goto end; } + else if (part_info->part_type == VERSIONING_PARTITION) + { + error_str= "SYSTEM_TIME"; + if (unlikely(part_info->check_range_constants(thd))) + goto end; + } else { DBUG_ASSERT(0); @@ -2182,6 +2216,20 @@ static int add_partition_values(String *str, partition_info *part_info, } while (++i < num_items); err+= str->append(')'); } + else if (part_info->part_type == VERSIONING_PARTITION) + { + switch (p_elem->type()) + { + case partition_element::AS_OF_NOW: + err+= str->append(STRING_WITH_LEN(" AS OF CURRENT_TIMESTAMP")); + break; + case partition_element::VERSIONING: + err+= str->append(STRING_WITH_LEN(" VERSIONING")); + break; + default: + DBUG_ASSERT(0 && "wrong p_elem->type"); + } + } end: return err; } @@ -2275,13 +2323,32 @@ char *generate_partition_syntax(THD *thd, partition_info *part_info, else err+= str.append(STRING_WITH_LEN("HASH ")); break; + case VERSIONING_PARTITION: + err+= str.append(STRING_WITH_LEN("SYSTEM_TIME ")); + break; default: DBUG_ASSERT(0); /* We really shouldn't get here, no use in continuing from here */ my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR)); DBUG_RETURN(NULL); } - if (part_info->part_expr) + if (part_info->part_type == VERSIONING_PARTITION) + { + Vers_part_info *vers_info= part_info->vers_info; + DBUG_ASSERT(vers_info); + if (vers_info->interval) + { + err+= str.append(STRING_WITH_LEN("INTERVAL ")); + err+= str.append_ulonglong(vers_info->interval); + err+= str.append(STRING_WITH_LEN(" SECOND ")); + } + if (vers_info->limit) + { + err+= str.append(STRING_WITH_LEN("LIMIT ")); + err+= str.append_ulonglong(vers_info->limit); + } + } + else if (part_info->part_expr) { err+= str.append('('); part_info->part_expr->print_for_table_def(&str); @@ -3088,6 +3155,72 @@ int get_partition_id_range_col(partition_info *part_info, } +int vers_get_partition_id(partition_info *part_info, + uint32 *part_id, + longlong *func_value) +{ + DBUG_ENTER("vers_get_partition_id"); + DBUG_ASSERT(part_info); + Field *sys_trx_end= part_info->part_field_array[STAT_TRX_END]; + DBUG_ASSERT(sys_trx_end); + DBUG_ASSERT(part_info->table); + Vers_part_info *vers_info= part_info->vers_info; + DBUG_ASSERT(vers_info && vers_info->initialized()); + DBUG_ASSERT(sys_trx_end->table == part_info->table && part_info->table->versioned()); + DBUG_ASSERT(part_info->table->vers_end_field() == sys_trx_end); + + // new rows have NULL in sys_trx_end + if (sys_trx_end->is_max() || sys_trx_end->is_null()) + { + *part_id= vers_info->now_part->id; + } + else // row is historical + { + THD *thd= current_thd; + TABLE *table= part_info->table; + + switch (thd->lex->sql_command) + { + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_ALTER_TABLE: + mysql_mutex_lock(&table->s->LOCK_rotation); + if (table->s->busy_rotation) + { + table->s->vers_wait_rotation(); + part_info->vers_hist_part(); + } + else + { + table->s->busy_rotation= true; + mysql_mutex_unlock(&table->s->LOCK_rotation); + // transaction is not yet pushed to VTQ, so we use now-time + ulong sec_part; + my_time_t end_ts= sys_trx_end->table->versioned_by_engine() ? + my_time_t(0) : sys_trx_end->get_timestamp(&sec_part); + if (part_info->vers_limit_exceed() || part_info->vers_interval_exceed(end_ts)) + { + part_info->vers_part_rotate(thd); + } + mysql_mutex_lock(&table->s->LOCK_rotation); + mysql_cond_broadcast(&table->s->COND_rotation); + table->s->busy_rotation= false; + } + mysql_mutex_unlock(&table->s->LOCK_rotation); + break; + default: + ; + } + *part_id= vers_info->hist_part->id; + } + + DBUG_PRINT("exit",("partition: %d", *part_id)); + DBUG_RETURN(0); +} + + int get_partition_id_range(partition_info *part_info, uint32 *part_id, longlong *func_value) @@ -4654,7 +4787,8 @@ uint prep_alter_part_table(THD *thd, TABLE *table, Alter_info *alter_info, must know the number of new partitions in this case. */ if (thd->lex->no_write_to_binlog && - tab_part_info->part_type != HASH_PARTITION) + tab_part_info->part_type != HASH_PARTITION && + tab_part_info->part_type != VERSIONING_PARTITION) { my_error(ER_NO_BINLOG_ERROR, MYF(0)); goto err; @@ -4859,6 +4993,21 @@ that are reorganised. partition configuration is made. */ { + partition_element *now_part= NULL; + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + List_iterator<partition_element> it(tab_part_info->partitions); + partition_element *el; + while ((el= it++)) + { + if (el->type() == partition_element::AS_OF_NOW) + { + DBUG_ASSERT(tab_part_info->vers_info && el == tab_part_info->vers_info->now_part); + it.remove(); + now_part= el; + } + } + } List_iterator<partition_element> alt_it(alt_part_info->partitions); uint part_count= 0; do @@ -4873,6 +5022,15 @@ that are reorganised. } } while (++part_count < num_new_partitions); tab_part_info->num_parts+= num_new_partitions; + if (tab_part_info->part_type == VERSIONING_PARTITION) + { + DBUG_ASSERT(now_part); + if (tab_part_info->partitions.push_back(now_part, thd->mem_root)) + { + mem_alloc_error(1); + goto err; + } + } } /* If we specify partitions explicitly we don't use defaults anymore. @@ -4906,16 +5064,28 @@ that are reorganised. List_iterator<partition_element> part_it(tab_part_info->partitions); tab_part_info->is_auto_partitioned= FALSE; - if (!(tab_part_info->part_type == RANGE_PARTITION || - tab_part_info->part_type == LIST_PARTITION)) + if (tab_part_info->part_type == VERSIONING_PARTITION) { - my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP"); - goto err; + if (num_parts_dropped >= tab_part_info->num_parts - 1) + { + DBUG_ASSERT(table && table->s && table->s->table_name.str); + my_error(ER_VERS_WRONG_PARTS, MYF(0), table->s->table_name.str); + goto err; + } } - if (num_parts_dropped >= tab_part_info->num_parts) + else { - my_error(ER_DROP_LAST_PARTITION, MYF(0)); - goto err; + if (!(tab_part_info->part_type == RANGE_PARTITION || + tab_part_info->part_type == LIST_PARTITION)) + { + my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP"); + goto err; + } + if (num_parts_dropped >= tab_part_info->num_parts) + { + my_error(ER_DROP_LAST_PARTITION, MYF(0)); + goto err; + } } do { @@ -4923,6 +5093,13 @@ that are reorganised. if (is_name_in_list(part_elem->partition_name, alter_info->partition_names)) { + if (tab_part_info->part_type == VERSIONING_PARTITION && + part_elem->type() == partition_element::AS_OF_NOW) + { + DBUG_ASSERT(table && table->s && table->s->table_name.str); + my_error(ER_VERS_WRONG_PARTS, MYF(0), table->s->table_name.str); + goto err; + } /* Set state to indicate that the partition is to be dropped. */ @@ -5245,6 +5422,12 @@ the generated partition syntax in a correct manner. tab_part_info->use_default_subpartitions= FALSE; tab_part_info->use_default_num_subpartitions= FALSE; } + + if (alter_info->flags & Alter_info::ALTER_ADD_PARTITION && + tab_part_info->part_type == VERSIONING_PARTITION && + tab_part_info->vers_setup_expression(thd, alt_part_info->partitions.elements)) + goto err; + if (tab_part_info->check_partition_info(thd, (handlerton**)NULL, table->file, 0, TRUE)) { @@ -6916,6 +7099,39 @@ err: } #endif + +/* + Prepare for calling val_int on partition function by setting fields to + point to the record where the values of the PF-fields are stored. + + SYNOPSIS + set_field_ptr() + ptr Array of fields to change ptr + new_buf New record pointer + old_buf Old record pointer + + DESCRIPTION + Set ptr in field objects of field array to refer to new_buf record + instead of previously old_buf. Used before calling val_int and after + it is used to restore pointers to table->record[0]. + This routine is placed outside of partition code since it can be useful + also for other programs. +*/ + +void set_field_ptr(Field **ptr, const uchar *new_buf, + const uchar *old_buf) +{ + my_ptrdiff_t diff= (new_buf - old_buf); + DBUG_ENTER("set_field_ptr"); + + do + { + (*ptr)->move_field_offset(diff); + } while (*(++ptr)); + DBUG_VOID_RETURN; +} + + /* Prepare for calling val_int on partition function by setting fields to point to the record where the values of the PF-fields are stored. @@ -6954,6 +7170,61 @@ void set_key_field_ptr(KEY *key_info, const uchar *new_buf, } +/** + Append all fields in read_set to string + + @param[in,out] str String to append to. + @param[in] row Row to append. + @param[in] table Table containing read_set and fields for the row. +*/ +void append_row_to_str(String &str, const uchar *row, TABLE *table) +{ + Field **fields, **field_ptr; + const uchar *rec; + uint num_fields= bitmap_bits_set(table->read_set); + uint curr_field_index= 0; + bool is_rec0= !row || row == table->record[0]; + if (!row) + rec= table->record[0]; + else + rec= row; + + /* Create a new array of all read fields. */ + fields= (Field**) my_malloc(sizeof(void*) * (num_fields + 1), + MYF(0)); + if (!fields) + return; + fields[num_fields]= NULL; + for (field_ptr= table->field; + *field_ptr; + field_ptr++) + { + if (!bitmap_is_set(table->read_set, (*field_ptr)->field_index)) + continue; + fields[curr_field_index++]= *field_ptr; + } + + + if (!is_rec0) + set_field_ptr(fields, rec, table->record[0]); + + for (field_ptr= fields; + *field_ptr; + field_ptr++) + { + Field *field= *field_ptr; + str.append(" "); + str.append(field->field_name); + str.append(":"); + field_unpack(&str, field, rec, 0, false); + } + + if (!is_rec0) + set_field_ptr(fields, table->record[0], rec); + my_free(fields); +} + + /* SYNOPSIS mem_alloc_error() @@ -7100,6 +7371,7 @@ static void set_up_range_analysis_info(partition_info *part_info) switch (part_info->part_type) { case RANGE_PARTITION: case LIST_PARTITION: + case VERSIONING_PARTITION: if (!part_info->column_list) { if (part_info->part_expr->get_monotonicity_info() != NON_MONOTONIC) @@ -7400,7 +7672,7 @@ int get_part_iter_for_interval_cols_via_map(partition_info *part_info, uint full_length= 0; DBUG_ENTER("get_part_iter_for_interval_cols_via_map"); - if (part_info->part_type == RANGE_PARTITION) + if (part_info->part_type == RANGE_PARTITION || part_info->part_type == VERSIONING_PARTITION) { get_col_endpoint= get_partition_id_cols_range_for_endpoint; part_iter->get_next= get_next_partition_id_range; @@ -7446,7 +7718,7 @@ int get_part_iter_for_interval_cols_via_map(partition_info *part_info, } if (flags & NO_MAX_RANGE) { - if (part_info->part_type == RANGE_PARTITION) + if (part_info->part_type == RANGE_PARTITION || part_info->part_type == VERSIONING_PARTITION) part_iter->part_nums.end= part_info->num_parts; else /* LIST_PARTITION */ { @@ -8143,4 +8415,52 @@ uint get_partition_field_store_length(Field *field) store_length+= HA_KEY_BLOB_LENGTH; return store_length; } + +// FIXME: duplicate of ha_partition::set_up_table_before_create +bool set_up_table_before_create(THD *thd, + TABLE_SHARE *share, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + partition_element *part_elem) +{ + bool error= false; + const char *partition_name; + DBUG_ENTER("set_up_table_before_create"); + + DBUG_ASSERT(part_elem); + + if (!part_elem) + DBUG_RETURN(true); + share->max_rows= part_elem->part_max_rows; + share->min_rows= part_elem->part_min_rows; + partition_name= strrchr(partition_name_with_path, FN_LIBCHAR); + if ((part_elem->index_file_name && + (error= append_file_to_dir(thd, + const_cast<const char**>(&part_elem->index_file_name), + partition_name+1))) || + (part_elem->data_file_name && + (error= append_file_to_dir(thd, + const_cast<const char**>(&part_elem->data_file_name), + partition_name+1)))) + { + DBUG_RETURN(error); + } + if (part_elem->index_file_name != NULL) + { + info->index_file_name= part_elem->index_file_name; + } + if (part_elem->data_file_name != NULL) + { + info->data_file_name= part_elem->data_file_name; + } + if (part_elem->tablespace_name != NULL) + { + if (check_tablespace_name(part_elem->tablespace_name) != IDENT_NAME_OK) + { + DBUG_RETURN(true); + } + info->tablespace= part_elem->tablespace_name; + } + DBUG_RETURN(error); +} #endif diff --git a/sql/sql_partition.h b/sql/sql_partition.h index 992229afb05..626ceee3f13 100644 --- a/sql/sql_partition.h +++ b/sql/sql_partition.h @@ -41,6 +41,7 @@ typedef struct st_key_range key_range; #define HA_CAN_UPDATE_PARTITION_KEY (1 << 1) #define HA_CAN_PARTITION_UNIQUE (1 << 2) #define HA_USE_AUTO_PARTITION (1 << 3) +#define HA_ONLY_VERS_PARTITION (1 << 4) #define NORMAL_PART_NAME 0 #define TEMP_PART_NAME 1 @@ -128,6 +129,14 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info, bool check_part_func_fields(Field **ptr, bool ok_with_charsets); bool field_is_partition_charset(Field *field); Item* convert_charset_partition_constant(Item *item, CHARSET_INFO *cs); +/** + Append all fields in read_set to string + + @param[in,out] str String to append to. + @param[in] row Row to append. + @param[in] table Table containing read_set and fields for the row. +*/ +void append_row_to_str(String &str, const uchar *row, TABLE *table); void mem_alloc_error(size_t size); void truncate_partition_filename(char *path); @@ -291,4 +300,30 @@ int __attribute__((warn_unused_result)) void set_key_field_ptr(KEY *key_info, const uchar *new_buf, const uchar *old_buf); +/** Set up table for creating a partition. +Copy info from partition to the table share so the created partition +has the correct info. + @param thd THD object + @param share Table share to be updated. + @param info Create info to be updated. + @param part_elem partition_element containing the info. + + @return status + @retval TRUE Error + @retval FALSE Success + + @details + Set up + 1) Comment on partition + 2) MAX_ROWS, MIN_ROWS on partition + 3) Index file name on partition + 4) Data file name on partition +*/ +bool set_up_table_before_create(THD *thd, + TABLE_SHARE *share, + const char *partition_name_with_path, + HA_CREATE_INFO *info, + partition_element *part_elem); + #endif /* SQL_PARTITION_INCLUDED */ + diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 61e312646da..31cc6bcdd31 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -30,6 +30,7 @@ #include "sql_base.h" // tdc_remove_table, lock_table_names, #include "sql_handler.h" // mysql_ha_rm_tables #include "sql_statistics.h" +#include "vtmd.h" static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error); @@ -299,12 +300,23 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const char *new_db, LEX_CSTRING new_db_name= { new_db, strlen(new_db)}; (void) rename_table_in_stat_tables(thd, &db_name, &table_name, &new_db_name, &new_table); - if ((rc= Table_triggers_list::change_table_name(thd, ren_table->db, - old_alias, - ren_table->table_name, - new_db, - new_alias))) + VTMD_rename vtmd(*ren_table); + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) { + rc= vtmd.try_rename(thd, new_db_name, new_table); + if (rc) + goto revert_table_name; + } + rc= Table_triggers_list::change_table_name(thd, ren_table->db, + old_alias, + ren_table->table_name, + new_db, + new_alias); + if (rc) + { + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + vtmd.revert_rename(thd, new_db_name); +revert_table_name: /* We've succeeded in renaming table's .frm and in updating corresponding handler data, but have failed to update table's diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 96aa227b439..e0e160b45aa 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -54,12 +54,14 @@ #include "sql_statistics.h" #include "sql_cte.h" #include "sql_window.h" +#include "tztime.h" #include "debug_sync.h" // DEBUG_SYNC #include <m_ctype.h> #include <my_bit.h> #include <hash.h> #include <ft_global.h> +#include "sys_vars_shared.h" /* A key part number that means we're using a fulltext scan. @@ -669,6 +671,359 @@ setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array, DBUG_RETURN(res); } +bool vers_select_conds_t::init_from_sysvar(THD *thd) +{ + st_vers_asof_timestamp &in= thd->variables.vers_asof_timestamp; + type= (vers_range_type_t) in.type; + unit_start= UNIT_TIMESTAMP; + if (type != FOR_SYSTEM_TIME_UNSPECIFIED && type != FOR_SYSTEM_TIME_ALL) + { + DBUG_ASSERT(type == FOR_SYSTEM_TIME_AS_OF); + start= new (thd->mem_root) + Item_datetime_literal(thd, &in.ltime, TIME_SECOND_PART_DIGITS); + if (!start) + return true; + } + else + start= NULL; + end= NULL; + return false; +} + +inline +void JOIN::vers_check_items() +{ + Item_transformer transformer= &Item::vers_transformer; + + if (conds) + { + conds= conds->transform(thd, transformer, NULL); + } + + for (ORDER *ord= order; ord; ord= ord->next) + { + ord->item_ptr= (*ord->item)->transform(thd, transformer, NULL); + *ord->item= ord->item_ptr; + } + + for (ORDER *ord= group_list; ord; ord= ord->next) + { + ord->item_ptr= (*ord->item)->transform(thd, transformer, NULL); + *ord->item= ord->item_ptr; + } + + if (having) + { + having= having->transform(thd, transformer, NULL); + } +} + +int SELECT_LEX::vers_setup_conds(THD *thd, TABLE_LIST *tables, COND **where_expr) +{ + DBUG_ENTER("SELECT_LEX::vers_setup_cond"); +#define newx new (thd->mem_root) + + TABLE_LIST *table; + + if (!thd->stmt_arena->is_conventional() && + !thd->stmt_arena->is_stmt_prepare() && !thd->stmt_arena->is_sp_execute()) + { + // statement is already prepared + DBUG_RETURN(0); + } + + if (!versioned_tables) + { + for (table= tables; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + versioned_tables++; + else if (table->vers_conditions) + { + my_error(ER_VERSIONING_REQUIRED, MYF(0), table->alias); + DBUG_RETURN(-1); + } + } + } + + if (versioned_tables == 0) + DBUG_RETURN(0); + + /* For prepared statements we create items on statement arena, + because they must outlive execution phase for multiple executions. */ + Query_arena_stmt on_stmt_arena(thd); + + if (vers_saved_where) + { + DBUG_ASSERT(thd->stmt_arena->is_sp_execute()); + /* 2. this copy_andor_structure() is also required by the same reason */ + *where_expr= vers_saved_where->copy_andor_structure(thd); + } + else if (thd->stmt_arena->is_sp_execute()) + { + if (thd->stmt_arena->is_stmt_execute()) // SP executed second time (STMT_EXECUTED) + *where_expr= 0; + else if (*where_expr) // SP executed first time (STMT_INITIALIZED_FOR_SP) + /* 1. copy_andor_structure() is required since this andor tree + is modified later (and on shorter arena) */ + vers_saved_where= (*where_expr)->copy_andor_structure(thd); + } + + /* We have to save also non-versioned on_expr since we may have + conjuction of versioned + non-versioned */ + if (thd->stmt_arena->is_sp_execute()) + { + for (table= tables; table; table= table->next_local) + { + if (!table->table) + continue; + + if (table->saved_on_expr) // same logic as vers_saved_where + { + if (table->on_expr) + table->on_expr= table->saved_on_expr->copy_andor_structure(thd); + else + // on_expr was moved to WHERE (see below: Add ON expression to the WHERE) + *where_expr= and_items(thd, + *where_expr, + table->saved_on_expr->copy_andor_structure(thd)); + } + else if (table->on_expr && + thd->stmt_arena->state == Query_arena::STMT_INITIALIZED_FOR_SP) + { + table->saved_on_expr= table->on_expr->copy_andor_structure(thd); + } + } + } + + SELECT_LEX *outer_slex= next_select_in_list(); + // propagate derived conditions to outer SELECT_LEX + if (outer_slex && vers_export_outer) + { + for (table= outer_slex->table_list.first; table; table= table->next_local) + { + if (table->vers_conditions) + { + if (!is_linkage_set() && !table->vers_conditions.from_inner) + { + my_error(ER_VERS_SYSTEM_TIME_CLASH, MYF(0), table->alias); + DBUG_RETURN(-1); + } + } + else + { + table->vers_conditions= vers_export_outer; + table->vers_conditions.from_inner= true; + } + } + } + + for (table= tables; table; table= table->next_local) + { + if (table->table && table->table->versioned()) + { + vers_select_conds_t &vers_conditions= table->vers_conditions; + + // propagate system_time from nearest outer SELECT_LEX + if (!vers_conditions && outer_slex && vers_import_outer) + { + TABLE_LIST* derived= master_unit()->derived; + // inner SELECT may not be a derived table (derived == NULL) + while (derived && outer_slex && (!derived->vers_conditions || derived->vers_conditions.from_inner)) + { + derived= outer_slex->master_unit()->derived; + outer_slex= outer_slex->next_select_in_list(); + } + if (derived && outer_slex && !derived->vers_conditions.from_inner) + { + DBUG_ASSERT(derived->vers_conditions); + vers_conditions= derived->vers_conditions; + } + } + + // propagate system_time from sysvar + if (!vers_conditions) + { + if (vers_conditions.init_from_sysvar(thd)) + DBUG_RETURN(-1); + } + + if (vers_conditions) + { + switch (this->lock_type) + { + case TL_WRITE_ALLOW_WRITE: + case TL_WRITE_CONCURRENT_INSERT: + case TL_WRITE_DELAYED: + case TL_WRITE_DEFAULT: + case TL_WRITE_LOW_PRIORITY: + case TL_WRITE: + case TL_WRITE_ONLY: + my_error(ER_VERS_HISTORY_LOCK, MYF(0)); + DBUG_RETURN(-1); + default: + break; + } + + if (vers_conditions == FOR_SYSTEM_TIME_ALL) + continue; + } // if (vers_conditions) + + COND** dst_cond= where_expr; + if (table->on_expr) + { + dst_cond= &table->on_expr; + } + + if (TABLE_LIST *t= table->embedding) + { + if (t->on_expr) + dst_cond= &t->on_expr; + } + + const LEX_CSTRING *fstart= &table->table->vers_start_field()->field_name; + const LEX_CSTRING *fend= &table->table->vers_end_field()->field_name; + + Item *row_start= + newx Item_field(thd, &this->context, table->db, table->alias, fstart); + Item *row_end= + newx Item_field(thd, &this->context, table->db, table->alias, fend); + Item *row_end2= row_end; + + bool tmp_from_ib= + table->table->s->table_category == TABLE_CATEGORY_TEMPORARY && + table->table->vers_start_field()->type() == MYSQL_TYPE_LONGLONG; + bool timestamps_only= table->table->versioned_by_sql() && !tmp_from_ib; + + if (vers_conditions) + { + vers_conditions.resolve_units(timestamps_only); + if (timestamps_only && (vers_conditions.unit_start == UNIT_TRX_ID || + vers_conditions.unit_end == UNIT_TRX_ID)) + { + my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->table_name); + DBUG_RETURN(-1); + } + } + + Item *cond1= 0, *cond2= 0, *curr= 0; + // Temporary tables of can be created from INNODB tables and thus will + // have uint64 type of sys_trx_(start|end) field. + // They need special handling. + TABLE *t= table->table; + if (tmp_from_ib || t->versioned_by_sql() || + thd->variables.vers_innodb_algorithm_simple) + { + switch (vers_conditions.type) + { + case FOR_SYSTEM_TIME_UNSPECIFIED: + if (t->vers_start_field()->real_type() != MYSQL_TYPE_LONGLONG) + { + MYSQL_TIME max_time; + thd->variables.time_zone->gmt_sec_to_TIME(&max_time, TIMESTAMP_MAX_VALUE); + max_time.second_part= TIME_MAX_SECOND_PART; + curr= newx Item_datetime_literal(thd, &max_time, + TIME_SECOND_PART_DIGITS); + cond1= newx Item_func_eq(thd, row_end, curr); + } + else + { + curr= newx Item_int(thd, ULONGLONG_MAX); + cond1= newx Item_func_eq(thd, row_end2, curr); + } + break; + case FOR_SYSTEM_TIME_AS_OF: + cond1= newx Item_func_le(thd, row_start, + vers_conditions.start); + cond2= newx Item_func_gt(thd, row_end, + vers_conditions.start); + break; + case FOR_SYSTEM_TIME_FROM_TO: + cond1= newx Item_func_lt(thd, row_start, + vers_conditions.end); + cond2= newx Item_func_ge(thd, row_end, + vers_conditions.start); + break; + case FOR_SYSTEM_TIME_BETWEEN: + cond1= newx Item_func_le(thd, row_start, + vers_conditions.end); + cond2= newx Item_func_ge(thd, row_end, + vers_conditions.start); + break; + case FOR_SYSTEM_TIME_BEFORE: + cond1= newx Item_func_lt(thd, row_end, + vers_conditions.start); + break; + default: + DBUG_ASSERT(0); + } + } + else + { + DBUG_ASSERT(table->table->s && table->table->s->db_plugin); + + Item *trx_id0, *trx_id1; + + switch (vers_conditions.type) + { + case FOR_SYSTEM_TIME_UNSPECIFIED: + curr= newx Item_int(thd, ULONGLONG_MAX); + cond1= newx Item_func_eq(thd, row_end2, curr); + break; + case FOR_SYSTEM_TIME_AS_OF: + trx_id0= vers_conditions.unit_start == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, vers_conditions.start, TR_table::FLD_TRX_ID) : + vers_conditions.start; + cond1= newx Item_func_vtq_trx_sees_eq(thd, trx_id0, row_start); + cond2= newx Item_func_vtq_trx_sees(thd, row_end, trx_id0); + break; + case FOR_SYSTEM_TIME_FROM_TO: + case FOR_SYSTEM_TIME_BETWEEN: + trx_id0= vers_conditions.unit_start == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, vers_conditions.start, TR_table::FLD_TRX_ID, true) : + vers_conditions.start; + trx_id1= vers_conditions.unit_end == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, vers_conditions.end, TR_table::FLD_TRX_ID, false) : + vers_conditions.end; + cond1= vers_conditions.type == FOR_SYSTEM_TIME_FROM_TO ? + newx Item_func_vtq_trx_sees(thd, trx_id1, row_start) : + newx Item_func_vtq_trx_sees_eq(thd, trx_id1, row_start); + cond2= newx Item_func_vtq_trx_sees_eq(thd, row_end, trx_id0); + break; + case FOR_SYSTEM_TIME_BEFORE: + trx_id0= vers_conditions.unit_start == UNIT_TIMESTAMP ? + newx Item_func_vtq_id(thd, vers_conditions.start, TR_table::FLD_TRX_ID) : + vers_conditions.start; + cond1= newx Item_func_lt(thd, row_end, trx_id0); + break; + default: + DBUG_ASSERT(0); + } + } + + if (cond1) + { + cond1= and_items(thd, + *dst_cond, + and_items(thd, + cond2, + cond1)); + + if (on_stmt_arena.arena_replaced()) + *dst_cond= cond1; + else + thd->change_item_tree(dst_cond, cond1); + + this->where= *dst_cond; + this->where->top_level_item(); + } + } // if (... table->table->versioned()) + } // for (table= tables; ...) + + DBUG_RETURN(0); +#undef newx +} + /***************************************************************************** Check fields, find best join, do the select and output fields. mysql_select assumes that all tables are already opened @@ -744,7 +1099,11 @@ JOIN::prepare(TABLE_LIST *tables_init, { remove_redundant_subquery_clauses(select_lex); } - + + /* System Versioning: handle FOR SYSTEM_TIME clause. */ + if (select_lex->vers_setup_conds(thd, tables_list, &conds) < 0) + DBUG_RETURN(-1); + /* TRUE if the SELECT list mixes elements with and without grouping, and there is no GROUP BY clause. Mixing non-aggregated fields with @@ -1028,6 +1387,11 @@ JOIN::prepare(TABLE_LIST *tables_init, if (!procedure && result && result->prepare(fields_list, unit_arg)) goto err; /* purecov: inspected */ + if (!thd->stmt_arena->is_stmt_prepare() && select_lex->versioned_tables > 0) + { + vers_check_items(); + } + unit= unit_arg; if (prepare_stage2()) goto err; @@ -3621,6 +3985,16 @@ void JOIN::exec_inner() result->send_result_set_metadata( procedure ? procedure_fields_list : *fields, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); + + { + List_iterator<Item> it(*columns_list); + while (Item *item= it++) + { + Item_transformer transformer= &Item::vers_transformer; + it.replace(item->transform(thd, transformer, NULL)); + } + } + error= do_select(this, procedure); /* Accumulate the counts from all join iterations of all join parts. */ thd->inc_examined_row_count(join_examined_rows); @@ -16402,7 +16776,12 @@ Field *create_tmp_field_from_field(THD *thd, Field *org_field, item->result_field= new_field; else new_field->field_name= *name; - new_field->flags|= (org_field->flags & NO_DEFAULT_VALUE_FLAG); + new_field->flags|= (org_field->flags & ( + NO_DEFAULT_VALUE_FLAG | + HIDDEN_FLAG | + VERS_SYS_START_FLAG | + VERS_SYS_END_FLAG | + VERS_UPDATE_UNVERSIONED_FLAG)); if (org_field->maybe_null() || (item && item->maybe_null)) new_field->flags&= ~NOT_NULL_FLAG; // Because of outer join if (org_field->type() == MYSQL_TYPE_VAR_STRING || @@ -16997,6 +17376,8 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, List_iterator_fast<Item> li(fields); Item *item; Field **tmp_from_field=from_field; + Field *sys_trx_start= NULL; + Field *sys_trx_end= NULL; while ((item=li++)) { Item::Type type= item->type(); @@ -17123,6 +17504,31 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, continue; // Some kind of const item } DBUG_ASSERT(!new_field->field_name.str || strlen(new_field->field_name.str) == new_field->field_name.length); + + if (type == Item::FIELD_ITEM || type == Item::REF_ITEM) + { + if (item->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *item_field= (Item_field *)item->real_item(); + Field *field= item_field->field; + TABLE_SHARE *s= field->table->s; + if (s->versioned) + { + if (field->flags & VERS_SYS_START_FLAG) + sys_trx_start= new_field; + else if (field->flags & VERS_SYS_END_FLAG) + sys_trx_end= new_field; + } + } + } + if (type == Item::TYPE_HOLDER) + { + Item_type_holder *ith= (Item_type_holder*)item; + if (ith->field_flags() & VERS_SYS_START_FLAG) + sys_trx_start= new_field; + else if (ith->field_flags() & VERS_SYS_END_FLAG) + sys_trx_end= new_field; + } if (type == Item::SUM_FUNC_ITEM) { Item_sum *agg_item= (Item_sum *) item; @@ -17203,6 +17609,21 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, total_uneven_bit_length= 0; } } + + if (sys_trx_start && sys_trx_end) + { + sys_trx_start->flags|= VERS_SYS_START_FLAG | HIDDEN_FLAG; + sys_trx_end->flags|= VERS_SYS_END_FLAG | HIDDEN_FLAG; + share->versioned= true; + share->field= table->field; + share->row_start_field= sys_trx_start->field_index; + share->row_end_field= sys_trx_end->field_index; + } + else + { + DBUG_ASSERT(!sys_trx_start && !sys_trx_end); + } + DBUG_ASSERT(fieldnr == (uint) (reg_field - table->field)); DBUG_ASSERT(field_count >= (uint) (reg_field - table->field)); field_count= fieldnr; @@ -25753,6 +26174,11 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, } #endif /* WITH_PARTITION_STORAGE_ENGINE */ } + if (table && table->versioned()) + { + // versioning conditions are already unwrapped to WHERE clause + str->append(" FOR SYSTEM_TIME ALL"); + } if (my_strcasecmp(table_alias_charset, cmp_name, alias)) { char t_alias_buff[MAX_ALIAS_NAME]; diff --git a/sql/sql_select.h b/sql/sql_select.h index 95e9e943c26..659bc4e7827 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1713,6 +1713,7 @@ private: bool add_having_as_table_cond(JOIN_TAB *tab); bool make_aggr_tables_info(); + void vers_check_items(); }; enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS}; diff --git a/sql/sql_show.cc b/sql/sql_show.cc index c88991f7795..dd4d9178b35 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -62,6 +62,8 @@ #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" #endif +#include "vtmd.h" +#include "transaction.h" enum enum_i_s_events_fields { @@ -568,6 +570,7 @@ static struct show_privileges_st sys_privileges[]= {"Create view", "Tables", "To create new views"}, {"Create user", "Server Admin", "To create new users"}, {"Delete", "Tables", "To delete existing rows"}, + {"Delete versioning rows", "Tables", "To delete versioning table historical rows"}, {"Drop", "Databases,Tables", "To drop databases, tables, and views"}, #ifdef HAVE_EVENT_SCHEDULER {"Event","Server Admin","To create, alter, drop and execute events"}, @@ -1291,6 +1294,15 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list) */ MDL_savepoint mdl_savepoint= thd->mdl_context.mdl_savepoint(); + TABLE_LIST archive; + bool versioned= table_list->vers_conditions; + if (versioned) + { + DBUG_ASSERT(table_list->vers_conditions == FOR_SYSTEM_TIME_AS_OF); + VTMD_table vtmd(*table_list); + if (vtmd.setup_select(thd)) + goto exit; + } if (mysqld_show_create_get_fields(thd, table_list, &field_list, &buffer)) goto exit; @@ -1333,6 +1345,13 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list) my_eof(thd); exit: + if (versioned) + { + /* If commit fails, we should be able to reset the OK status. */ + thd->get_stmt_da()->set_overwrite_status(true); + trans_commit_stmt(thd); + thd->get_stmt_da()->set_overwrite_status(false); + } close_thread_tables(thd); /* Release any metadata locks taken during SHOW CREATE. */ thd->mdl_context.rollback_to_savepoint(mdl_savepoint); @@ -1695,6 +1714,7 @@ static bool get_field_default_value(THD *thd, Field *field, String *def_value, has_default= (field->default_value || (!(field->flags & NO_DEFAULT_VALUE_FLAG) && + !field->vers_sys_field() && field->unireg_check != Field::NEXT_NUMBER)); def_value->length(0); @@ -2014,6 +2034,7 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, TABLE *table= table_list->table; TABLE_SHARE *share= table->s; sql_mode_t sql_mode= thd->variables.sql_mode; + ulong vers_hide= thd->variables.vers_hide; bool foreign_db_mode= sql_mode & (MODE_POSTGRESQL | MODE_ORACLE | MODE_MSSQL | MODE_DB2 | MODE_MAXDB | MODE_ANSI); @@ -2053,7 +2074,7 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, alias= table_list->schema_table->table_name; else { - if (lower_case_table_names == 2) + if (lower_case_table_names == 2 || table_list->vers_force_alias) alias= table->alias.c_ptr(); else { @@ -2092,6 +2113,10 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, { uint flags = field->flags; + if (vers_hide == VERS_HIDE_FULL && + (flags & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG))) + continue; + if (ptr != table->field) packet->append(STRING_WITH_LEN(",\n")); @@ -2136,7 +2161,15 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, } else { - if (flags & NOT_NULL_FLAG) + if (field->flags & VERS_SYS_START_FLAG) + { + packet->append(STRING_WITH_LEN(" GENERATED ALWAYS AS ROW START")); + } + else if (field->flags & VERS_SYS_END_FLAG) + { + packet->append(STRING_WITH_LEN(" GENERATED ALWAYS AS ROW END")); + } + else if (flags & NOT_NULL_FLAG) packet->append(STRING_WITH_LEN(" NOT NULL")); else if (field->type() == MYSQL_TYPE_TIMESTAMP) { @@ -2154,6 +2187,11 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, packet->append(def_value.ptr(), def_value.length(), system_charset_info); } + if (field->vers_update_unversioned()) + { + packet->append(STRING_WITH_LEN(" WITHOUT SYSTEM VERSIONING")); + } + if (!limited_mysql_mode && print_on_update_clause(field, &def_value, false)) { @@ -2217,6 +2255,14 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, for (uint j=0 ; j < key_info->user_defined_key_parts ; j++,key_part++) { + Field *field= key_part->field; + if (field && field->vers_sys_field()) + { + if (vers_hide == VERS_HIDE_FULL /*|| ((field->flags & HIDDEN_FLAG) && + vers_hide != VERS_HIDE_NEVER)*/) + continue; + } + if (j) packet->append(','); @@ -2245,6 +2291,17 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, hton->index_options); } + if (table->versioned() && vers_hide != VERS_HIDE_FULL) + { + const Field *fs = table->vers_start_field(); + const Field *fe = table->vers_end_field(); + packet->append(STRING_WITH_LEN(",\n PERIOD FOR SYSTEM_TIME (")); + append_identifier(thd,packet,fs->field_name.str, fs->field_name.length); + packet->append(STRING_WITH_LEN(", ")); + append_identifier(thd,packet,fe->field_name.str, fe->field_name.length); + packet->append(STRING_WITH_LEN(")")); + } + /* Get possible foreign key definitions stored in InnoDB and append them to the CREATE TABLE statement @@ -2283,6 +2340,11 @@ int show_create_table(THD *thd, TABLE_LIST *table_list, String *packet, add_table_options(thd, table, create_info_arg, table_list->schema_table != 0, 0, packet); + if (table->versioned() && vers_hide != VERS_HIDE_FULL) + { + packet->append(STRING_WITH_LEN(" WITH SYSTEM VERSIONING")); + } + #ifdef WITH_PARTITION_STORAGE_ENGINE { if (table->part_info && @@ -4194,7 +4256,7 @@ int schema_tables_add(THD *thd, Dynamic_array<LEX_CSTRING*> *files, @retval 2 Not fatal error; Safe to ignore this file list */ -static int +int make_table_name_list(THD *thd, Dynamic_array<LEX_CSTRING*> *table_names, LEX *lex, LOOKUP_FIELD_VALUES *lookup_field_vals, LEX_CSTRING *db_name) @@ -4846,6 +4908,62 @@ public: } }; +static bool get_all_archive_tables(THD *thd, + Dynamic_array<String> &all_archive_tables) +{ + if (thd->variables.vers_hide == VERS_HIDE_NEVER) + return false; + + if (thd->variables.vers_alter_history != VERS_ALTER_HISTORY_SURVIVE) + return false; + + Dynamic_array<LEX_CSTRING *> all_db; + LOOKUP_FIELD_VALUES lookup_field_values= { + {C_STRING_WITH_LEN("%")}, {NULL, 0}, true, false}; + if (make_db_list(thd, &all_db, &lookup_field_values)) + return true; + + LEX_STRING information_schema= {C_STRING_WITH_LEN("information_schema")}; + for (size_t i= 0; i < all_db.elements(); i++) + { + LEX_CSTRING db= *all_db.at(i); + if (db.length == information_schema.length && + !memcmp(db.str, information_schema.str, db.length)) + { + all_db.del(i); + break; + } + } + + for (size_t i= 0; i < all_db.elements(); i++) + { + LEX_CSTRING db_name= *all_db.at(i); + Dynamic_array<String> archive_tables; + if (VTMD_table::get_archive_tables(thd, db_name.str, db_name.length, + archive_tables)) + return true; + for (size_t i= 0; i < archive_tables.elements(); i++) + if (all_archive_tables.push(archive_tables.at(i))) + return true; + } + + return false; +} + +static bool is_archive_table(const Dynamic_array<String> &all_archive_tables, + const LEX_CSTRING candidate) +{ + for (size_t i= 0; i < all_archive_tables.elements(); i++) + { + const String &archive_table= all_archive_tables.at(i); + if (candidate.length == archive_table.length() && + !memcmp(candidate.str, archive_table.ptr(), candidate.length)) + { + return true; + } + } + return false; +} /** @brief Fill I_S tables whose data are retrieved @@ -4888,6 +5006,7 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) #endif uint table_open_method= tables->table_open_method; bool can_deadlock; + Dynamic_array<String> all_archive_tables; MEM_ROOT tmp_mem_root; DBUG_ENTER("get_all_tables"); @@ -4954,6 +5073,9 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) if (make_db_list(thd, &db_names, &plan->lookup_field_vals)) goto err; + if (get_all_archive_tables(thd, all_archive_tables)) + goto err; + /* Use tmp_mem_root to allocate data for opened tables */ init_alloc_root(&tmp_mem_root, SHOW_ALLOC_BLOCK_SIZE, SHOW_ALLOC_BLOCK_SIZE, MY_THREAD_SPECIFIC); @@ -4983,6 +5105,9 @@ int get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond) LEX_CSTRING *table_name= table_names.at(i); DBUG_ASSERT(table_name->length <= NAME_LEN); + if (is_archive_table(all_archive_tables, *table_name)) + continue; + #ifndef NO_EMBEDDED_ACCESS_CHECKS if (!(thd->col_access & TABLE_ACLS)) { @@ -7047,6 +7172,10 @@ static int get_schema_partitions_record(THD *thd, TABLE_LIST *tables, tmp_res.append(STRING_WITH_LEN("HASH")); table->field[7]->store(tmp_res.ptr(), tmp_res.length(), cs); break; + case VERSIONING_PARTITION: + tmp_res.length(0); + tmp_res.append(STRING_WITH_LEN("SYSTEM_TIME")); + break; default: DBUG_ASSERT(0); my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR)); @@ -7752,7 +7881,8 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list) if (!(item=new (mem_root) Item_return_date_time(thd, fields_info->field_name, field_name_length, - fields_info->field_type))) + fields_info->field_type, + fields_info->field_length))) DBUG_RETURN(0); item->decimals= fields_info->field_length; break; diff --git a/sql/sql_show.h b/sql/sql_show.h index dc2fe9738fe..ac13099ca48 100644 --- a/sql/sql_show.h +++ b/sql/sql_show.h @@ -199,6 +199,9 @@ typedef struct st_lookup_field_values bool wild_table_value; } LOOKUP_FIELD_VALUES; +int make_table_name_list(THD *thd, Dynamic_array<LEX_CSTRING *> *table_names, + LEX *lex, LOOKUP_FIELD_VALUES *lookup_field_vals, + LEX_CSTRING *db_name); /* INFORMATION_SCHEMA: Execution plan for get_all_tables() call diff --git a/sql/sql_table.cc b/sql/sql_table.cc index dbf198d722a..ba5eb36173b 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -55,6 +55,9 @@ #include "transaction.h" #include "sql_audit.h" #include "sql_sequence.h" +#include "tztime.h" +#include "vtmd.h" // System Versioning + #ifdef __WIN__ #include <io.h> @@ -2305,6 +2308,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, const char *db= table->db; size_t db_length= table->db_length; handlerton *table_type= 0; + VTMD_drop vtmd(*table); DBUG_PRINT("table", ("table_l: '%s'.'%s' table: %p s: %p", table->db, table->table_name, table->table, @@ -2465,6 +2469,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, else { char *end; + int frm_delete_error= 0; /* It could happen that table's share in the table definition cache is the only thing that keeps the engine plugin loaded @@ -2503,30 +2508,51 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, // Remove extension for delete *(end= path + path_length - reg_ext_length)= '\0'; - error= ha_delete_table(thd, table_type, path, db, table->table_name, - !dont_log_query); - - if (!error) + if ((thd->lex->sql_command == SQLCOM_DROP_TABLE || + thd->lex->sql_command == SQLCOM_CREATE_TABLE) && + thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE && + table_type && table_type != view_pseudo_hton) + { + error= vtmd.check_exists(thd); + if (error) + goto non_tmp_err; + if (!vtmd.exists) + goto drop_table; + error= mysql_rename_table(table_type, table->db, table->table_name, + table->db, vtmd.archive_name(thd), NO_FK_CHECKS); + } + else { - int frm_delete_error, trigger_drop_error= 0; - /* Delete the table definition file */ - strmov(end,reg_ext); - if (table_type && table_type != view_pseudo_hton && - table_type->discover_table) + drop_table: + error= ha_delete_table(thd, table_type, path, db, table->table_name, + !dont_log_query); + if (!error) { - /* - Table type is using discovery and may not need a .frm file. - Delete it silently if it exists - */ - (void) mysql_file_delete(key_file_frm, path, MYF(0)); - frm_delete_error= 0; + /* Delete the table definition file */ + strmov(end,reg_ext); + if (table_type && table_type != view_pseudo_hton && + table_type->discover_table) + { + /* + Table type is using discovery and may not need a .frm file. + Delete it silently if it exists + */ + (void) mysql_file_delete(key_file_frm, path, MYF(0)); + } + else if (mysql_file_delete(key_file_frm, path, + MYF(MY_WME))) + { + frm_delete_error= my_errno; + DBUG_ASSERT(frm_delete_error); + } } - else - frm_delete_error= mysql_file_delete(key_file_frm, path, - MYF(MY_WME)); - if (frm_delete_error) - frm_delete_error= my_errno; - else + } + + if (!error) + { + int trigger_drop_error= 0; + + if (!frm_delete_error) { non_tmp_table_deleted= TRUE; trigger_drop_error= @@ -2539,8 +2565,21 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, else if (frm_delete_error && if_exists) thd->clear_error(); } + non_tmp_err: non_tmp_error|= MY_TEST(error); } + + if (!error && vtmd.exists) + { + enum_sql_command sql_command= thd->lex->sql_command; + thd->lex->sql_command= SQLCOM_DROP_TABLE; + error= vtmd.update(thd); + thd->lex->sql_command= sql_command; + if (error) + mysql_rename_table(table_type, table->db, vtmd.archive_name(), + table->db, table->table_name, NO_FK_CHECKS); + } + if (error) { if (wrong_tables.length()) @@ -3034,10 +3073,12 @@ void promote_first_timestamp_column(List<Create_field> *column_definitions) if (column_definition->is_timestamp_type() || // TIMESTAMP column_definition->unireg_check == Field::TIMESTAMP_OLD_FIELD) // Legacy { + DBUG_PRINT("info", ("field-ptr:%p", column_definition->field)); if ((column_definition->flags & NOT_NULL_FLAG) != 0 && // NOT NULL, column_definition->default_value == NULL && // no constant default, column_definition->unireg_check == Field::NONE && // no function default - column_definition->vcol_info == NULL) + column_definition->vcol_info == NULL && + !(column_definition->flags & (VERS_SYS_START_FLAG | VERS_SYS_END_FLAG))) // column isn't generated { DBUG_PRINT("info", ("First TIMESTAMP column '%s' was promoted to " "DEFAULT CURRENT_TIMESTAMP ON UPDATE " @@ -3348,6 +3389,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, } select_field_pos= alter_info->create_list.elements - select_field_count; + for (field_no=0; (sql_field=it++) ; field_no++) { /* @@ -4293,6 +4335,51 @@ bool Column_definition::sp_prepare_create_field(THD *thd, MEM_ROOT *mem_root) } +static bool +vers_prepare_keys(THD *thd, + HA_CREATE_INFO *create_info, + Alter_info *alter_info, + KEY **key_info, + uint key_count) +{ + DBUG_ASSERT(create_info->versioned()); + + const char *row_start_field= create_info->vers_info.as_row.start; + DBUG_ASSERT(row_start_field); + const char *row_end_field= create_info->vers_info.as_row.end; + DBUG_ASSERT(row_end_field); + + List_iterator<Key> key_it(alter_info->key_list); + Key *key= NULL; + while ((key=key_it++)) + { + if (key->type != Key::PRIMARY && key->type != Key::UNIQUE) + continue; + + Key_part_spec *key_part= NULL; + List_iterator<Key_part_spec> part_it(key->columns); + while ((key_part=part_it++)) + { + if (!my_strcasecmp(system_charset_info, + row_start_field, + key_part->field_name.str) || + + !my_strcasecmp(system_charset_info, + row_end_field, + key_part->field_name.str)) + break; + } + if (key_part) + continue; // Key already contains Sys_start or Sys_end + + Key_part_spec *key_part_sys_end_col= + new (thd->mem_root) Key_part_spec(&create_info->vers_info.as_row.end, 0); + key->columns.push_back(key_part_sys_end_col); + } + + return false; +} + handler *mysql_create_frm_image(THD *thd, const char *db, const char *table_name, HA_CREATE_INFO *create_info, @@ -4451,7 +4538,10 @@ handler *mysql_create_frm_image(THD *thd, part_info->part_info_string= part_syntax_buf; part_info->part_info_len= syntax_len; if ((!(engine_type->partition_flags && - engine_type->partition_flags() & HA_CAN_PARTITION)) || + ((engine_type->partition_flags() & HA_CAN_PARTITION) || + (part_info->part_type == VERSIONING_PARTITION && + engine_type->partition_flags() & HA_ONLY_VERS_PARTITION)) + )) || create_info->db_type == partition_hton) { /* @@ -4532,6 +4622,13 @@ handler *mysql_create_frm_image(THD *thd, } #endif + if (create_info->versioned()) + { + if(vers_prepare_keys(thd, create_info, alter_info, key_info, + *key_count)) + goto err; + } + if (mysql_prepare_create_table(thd, create_info, alter_info, &db_options, file, key_info, key_count, create_table_mode)) @@ -4989,6 +5086,7 @@ bool mysql_create_table(THD *thd, TABLE_LIST *create_table, { thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); result= 1; + goto err; } else { @@ -4997,6 +5095,20 @@ bool mysql_create_table(THD *thd, TABLE_LIST *create_table, } } + if (create_info->versioned() && + thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + VTMD_table vtmd(*create_table); + if (vtmd.update(thd)) + { + thd->variables.vers_alter_history = VERS_ALTER_HISTORY_KEEP; + mysql_rm_table_no_locks(thd, create_table, 0, 0, 0, 0, 1, 1); + thd->variables.vers_alter_history = VERS_ALTER_HISTORY_SURVIVE; + result= 1; + goto err; + } + } + err: /* In RBR we don't need to log CREATE TEMPORARY TABLE */ if (thd->is_current_stmt_binlog_format_row() && create_info->tmp_table()) @@ -5107,6 +5219,66 @@ static void make_unique_constraint_name(THD *thd, LEX_CSTRING *name, ** Alter a table definition ****************************************************************************/ +bool operator!=(const MYSQL_TIME &lhs, const MYSQL_TIME &rhs) +{ + return lhs.year != rhs.year || lhs.month != rhs.month || lhs.day != rhs.day || + lhs.hour != rhs.hour || lhs.minute != rhs.minute || + lhs.second_part != rhs.second_part || lhs.neg != rhs.neg || + lhs.time_type != rhs.time_type; +} + +// Sets sys_trx_end=MAX for rows with sys_trx_end=now(6) +static bool vers_reset_alter_copy(THD *thd, TABLE *table) +{ + const MYSQL_TIME query_start= thd->query_start_TIME(); + + READ_RECORD info; + int error= 0; + bool will_batch= false; + ha_rows dup_key_found= 0; + if (init_read_record(&info, thd, table, NULL, NULL, 0, 1, true)) + goto err; + + will_batch= !table->file->start_bulk_update(); + + while (!(error= info.read_record())) + { + MYSQL_TIME current; + if (table->vers_end_field()->get_date(¤t, 0)) + goto err_read_record; + if (current != query_start) + { + continue; + } + + store_record(table, record[1]); + table->vers_end_field()->set_max(); + if (will_batch) + error= table->file->ha_bulk_update_row(table->record[1], table->record[0], + &dup_key_found); + else + error= table->file->ha_update_row(table->record[1], table->record[0]); + if (error && table->file->is_fatal_error(error, HA_CHECK_ALL)) + { + table->file->print_error(error, MYF(ME_FATALERROR)); + goto err_read_record; + } + } + + if (will_batch && (error= table->file->exec_bulk_update(&dup_key_found))) + table->file->print_error(error, MYF(ME_FATALERROR)); + if (will_batch) + table->file->end_bulk_update(); + +err_read_record: + end_read_record(&info); + +err: + if (table->file->ha_external_lock(thd, F_UNLCK)) + return true; + + return error ? true : false; +} /** Rename a table. @@ -5341,7 +5513,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, local_create_info.max_rows= 0; /* Replace type of source table with one specified in the statement. */ local_create_info.options&= ~HA_LEX_CREATE_TMP_TABLE; - local_create_info.options|= create_info->tmp_table(); + local_create_info.options|= create_info->options; /* Reset auto-increment counter for the new table. */ local_create_info.auto_increment_value= 0; /* @@ -5350,6 +5522,13 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, */ local_create_info.data_file_name= local_create_info.index_file_name= NULL; + if (src_table->table->versioned() && + local_create_info.vers_info.fix_create_like(local_alter_info, local_create_info, + *src_table, *table)) + { + goto err; + } + /* The following is needed only in case of lock tables */ if ((local_create_info.table= thd->lex->query_tables->table)) pos_in_locked_tables= local_create_info.table->pos_in_locked_tables; @@ -6226,6 +6405,8 @@ static bool fill_alter_inplace_info(THD *thd, ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_ADD_CHECK_CONSTRAINT; if (alter_info->flags & Alter_info::ALTER_DROP_CHECK_CONSTRAINT) ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_DROP_CHECK_CONSTRAINT; + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_DROP) + ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_DROP_HISTORICAL; /* If we altering table with old VARCHAR fields we will be automatically @@ -7258,11 +7439,34 @@ static bool mysql_inplace_alter_table(THD *thd, DEBUG_SYNC(thd, "alter_table_inplace_before_commit"); THD_STAGE_INFO(thd, stage_alter_inplace_commit); - if (table->file->ha_commit_inplace_alter_table(altered_table, - ha_alter_info, - true)) { - goto rollback; + TR_table trt(thd, true); + if (trt != *table_list && table->file->ht->prepare_commit_versioned) + { + ulonglong trx_start_id= 0; + ulonglong trx_end_id= table->file->ht->prepare_commit_versioned(thd, &trx_start_id); + if (trx_end_id) + { + if (!use_transaction_registry) + { + my_error(ER_VERS_TRT_IS_DISABLED, MYF(0)); + goto rollback; + } + if (trt.update(trx_start_id, trx_end_id)) + { + goto rollback; + } + } + } + + if (table->file->ha_commit_inplace_alter_table(altered_table, + ha_alter_info, + true)) + { + goto rollback; + } + + thd->drop_temporary_table(altered_table, NULL, false); } close_all_tables_for_name(thd, table->s, @@ -7272,8 +7476,6 @@ static bool mysql_inplace_alter_table(THD *thd, NULL); table_list->table= table= NULL; - thd->drop_temporary_table(altered_table, NULL, false); - /* Replace the old .FRM with the new .FRM, but keep the old name for now. Rename to the new name (if needed) will be handled separately below. @@ -8491,18 +8693,30 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list, if (mysql_rename_table(old_db_type, alter_ctx->db, alter_ctx->table_name, alter_ctx->new_db, alter_ctx->new_alias, 0)) error= -1; - else if (Table_triggers_list::change_table_name(thd, - alter_ctx->db, - alter_ctx->alias, - alter_ctx->table_name, - alter_ctx->new_db, - alter_ctx->new_alias)) - { - (void) mysql_rename_table(old_db_type, - alter_ctx->new_db, alter_ctx->new_alias, - alter_ctx->db, alter_ctx->table_name, - NO_FK_CHECKS); - error= -1; + else + { + VTMD_rename vtmd(*table_list); + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE && + vtmd.try_rename(thd, new_db_name, new_table_name)) + { + goto revert_table_name; + } + else if (Table_triggers_list::change_table_name(thd, + alter_ctx->db, + alter_ctx->alias, + alter_ctx->table_name, + alter_ctx->new_db, + alter_ctx->new_alias)) + { + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + vtmd.revert_rename(thd, new_db_name); +revert_table_name: + (void) mysql_rename_table(old_db_type, + alter_ctx->new_db, alter_ctx->new_alias, + alter_ctx->db, alter_ctx->table_name, + NO_FK_CHECKS); + error= -1; + } } } @@ -8634,6 +8848,56 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, &alter_prelocking_strategy); thd->open_options&= ~HA_OPEN_FOR_ALTER; + TABLE *table= table_list->table; + bool versioned= table && table->versioned(); + bool vers_survival_mod= false; + + if (versioned) + { + if (handlerton *hton1= create_info->db_type) + { + handlerton *hton2= table->file->ht; + if (hton1 != hton2 && + (ha_check_storage_engine_flag(hton1, HTON_NATIVE_SYS_VERSIONING) || + ha_check_storage_engine_flag(hton2, HTON_NATIVE_SYS_VERSIONING))) + { + my_error(ER_VERS_ALTER_ENGINE_PROHIBITED, MYF(0), table_list->db, + table_list->table_name); + DBUG_RETURN(true); + } + } + bool vers_data_mod= alter_info->data_modifying(); + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + vers_survival_mod= alter_info->data_modifying() || alter_info->partition_modifying(); + } + else if (vers_data_mod && thd->variables.vers_alter_history == VERS_ALTER_HISTORY_ERROR) + { + my_error(ER_VERS_ALTER_NOT_ALLOWED, MYF(0), table_list->db, table_list->table_name); + DBUG_RETURN(true); + } + } + + if (vers_survival_mod) + { + table_list->set_lock_type(thd, TL_WRITE); + if (thd->mdl_context.upgrade_shared_lock(table_list->table->mdl_ticket, + MDL_EXCLUSIVE, + thd->variables.lock_wait_timeout)) + { + DBUG_RETURN(true); + } + + if (table_list->table->versioned_by_engine() && + alter_info->requested_algorithm == + Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT && + !table_list->table->s->partition_info_str) + { + // Changle default ALGORITHM to COPY for INNODB + alter_info->requested_algorithm= Alter_info::ALTER_TABLE_ALGORITHM_COPY; + } + } + DEBUG_SYNC(thd, "alter_opened_table"); #ifdef WITH_WSREP @@ -8650,7 +8914,6 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, if (error) DBUG_RETURN(true); - TABLE *table= table_list->table; table->use_all_columns(); MDL_ticket *mdl_ticket= table->mdl_ticket; @@ -8761,6 +9024,12 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, if (check_engine(thd, alter_ctx.new_db, alter_ctx.new_name, create_info)) DBUG_RETURN(true); + if (create_info->vers_info.check_and_fix_alter(thd, alter_info, create_info, + table)) + { + DBUG_RETURN(true); + } + if ((create_info->db_type != table->s->db_type() || alter_info->flags & Alter_info::ALTER_PARTITION) && !table->file->can_switch_engines()) @@ -8939,9 +9208,11 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, Upgrade from MDL_SHARED_UPGRADABLE to MDL_SHARED_NO_WRITE. Afterwards it's safe to take the table level lock. */ - if (thd->mdl_context.upgrade_shared_lock(mdl_ticket, MDL_SHARED_NO_WRITE, - thd->variables.lock_wait_timeout) - || lock_tables(thd, table_list, alter_ctx.tables_opened, 0)) + if ((!vers_survival_mod && + thd->mdl_context.upgrade_shared_lock( + mdl_ticket, MDL_SHARED_NO_WRITE, + thd->variables.lock_wait_timeout)) || + lock_tables(thd, table_list, alter_ctx.tables_opened, 0)) { DBUG_RETURN(true); } @@ -9003,6 +9274,7 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, handlerton *new_db_type= create_info->db_type; handlerton *old_db_type= table->s->db_type(); TABLE *new_table= NULL; + bool new_versioned= false; ha_rows copied=0,deleted=0; /* @@ -9339,6 +9611,7 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, } if (!new_table) goto err_new_table_cleanup; + new_versioned= new_table->versioned(); /* Note: In case of MERGE table, we do not attach children. We do not copy data for MERGE tables. Only the children have data. @@ -9365,7 +9638,14 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, order_num, order, &copied, &deleted, alter_info->keys_onoff, &alter_ctx)) + { + if (vers_survival_mod && new_versioned && table->versioned_by_sql()) + { + // Failure of this function may result in corruption of an original table. + vers_reset_alter_copy(thd, table); + } goto err_new_table_cleanup; + } } else { @@ -9460,9 +9740,14 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, Rename the old table to temporary name to have a backup in case anything goes wrong while renaming the new table. */ - char backup_name[32]; - my_snprintf(backup_name, sizeof(backup_name), "%s2-%lx-%lx", tmp_file_prefix, - current_pid, (long) thd->thread_id); + char backup_name[FN_LEN]; + if (vers_survival_mod) + VTMD_table::archive_name(thd, alter_ctx.table_name, backup_name, + sizeof(backup_name)); + else + my_snprintf(backup_name, sizeof(backup_name), "%s2-%lx-%lx", + tmp_file_prefix, current_pid, thd->thread_id); + if (lower_case_table_names) my_casedn_str(files_charset_info, backup_name); if (mysql_rename_table(old_db_type, alter_ctx.db, alter_ctx.table_name, @@ -9490,6 +9775,17 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, goto err_with_mdl; } + if (vers_survival_mod && new_versioned) + { + DBUG_ASSERT(alter_info && table_list); + VTMD_rename vtmd(*table_list); + bool rc= alter_info->flags & Alter_info::ALTER_RENAME ? + vtmd.try_rename(thd, alter_ctx.new_db, alter_ctx.new_alias, backup_name) : + vtmd.update(thd, backup_name); + if (rc) + goto err_after_rename; + } + // Check if we renamed the table and if so update trigger files. if (alter_ctx.is_table_renamed()) { @@ -9500,6 +9796,7 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, alter_ctx.new_db, alter_ctx.new_alias)) { +err_after_rename: // Rename succeeded, delete the new table. (void) quick_rm_table(thd, new_db_type, alter_ctx.new_db, alter_ctx.new_alias, 0); @@ -9514,7 +9811,8 @@ bool mysql_alter_table(THD *thd, const char *new_db, const char *new_name, } // ALTER TABLE succeeded, delete the backup of the old table. - if (quick_rm_table(thd, old_db_type, alter_ctx.db, backup_name, FN_IS_TMP)) + if (!(vers_survival_mod && new_versioned) && + quick_rm_table(thd, old_db_type, alter_ctx.db, backup_name, FN_IS_TMP)) { /* The fact that deletion of the backup failed is not critical @@ -9701,6 +9999,11 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, sql_mode_t save_sql_mode= thd->variables.sql_mode; ulonglong prev_insert_id, time_to_report_progress; Field **dfield_ptr= to->default_field; + bool make_versioned= !from->versioned() && to->versioned(); + bool make_unversioned= from->versioned() && !to->versioned(); + bool keep_versioned= from->versioned() && to->versioned(); + Field *to_sys_trx_start= NULL, *to_sys_trx_end= NULL, *from_sys_trx_end= NULL; + MYSQL_TIME query_start; DBUG_ENTER("copy_data_between_tables"); /* Two or 3 stages; Sorting, copying data and update indexes */ @@ -9801,6 +10104,29 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, thd_progress_next_stage(thd); } + if (make_versioned) + { + query_start= thd->query_start_TIME(); + to_sys_trx_start= to->vers_start_field(); + to_sys_trx_end= to->vers_end_field(); + } + else if (make_unversioned) + { + from_sys_trx_end= from->vers_end_field(); + } + else if (keep_versioned) + { + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + query_start= thd->query_start_TIME(); + from_sys_trx_end= from->vers_end_field(); + to_sys_trx_start= to->vers_start_field(); + } else if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_DROP) + { + from_sys_trx_end= from->vers_end_field(); + } + } + THD_STAGE_INFO(thd, stage_copy_to_tmp_table); /* Tell handler that we have values for all columns in the to table */ to->use_all_columns(); @@ -9854,6 +10180,36 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, { copy_ptr->do_copy(copy_ptr); } + + if (thd->variables.vers_alter_history == VERS_ALTER_HISTORY_DROP && + from_sys_trx_end && !from_sys_trx_end->is_max()) + { + continue; + } + + if (make_versioned) + { + to_sys_trx_start->set_notnull(); + to_sys_trx_start->store_time(&query_start); + to_sys_trx_end->set_max(); + } + else if (make_unversioned) + { + if (!from_sys_trx_end->is_max()) + continue; // Drop history rows. + } + else if (keep_versioned && + thd->variables.vers_alter_history == VERS_ALTER_HISTORY_SURVIVE) + { + if (!from_sys_trx_end->is_max()) + continue; // Do not copy history rows. + + store_record(from, record[1]); + from->vers_end_field()->store_time(&query_start); + from->file->ha_update_row(from->record[1], from->record[0]); + to_sys_trx_start->store_time(&query_start); + } + prev_insert_id= to->file->next_insert_id; if (to->default_field) to->update_default_fields(0, ignore); @@ -9868,7 +10224,12 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, error= 1; break; } - error=to->file->ha_write_row(to->record[0]); + if (keep_versioned && to->versioned_by_engine() && + thd->variables.vers_alter_history != VERS_ALTER_HISTORY_SURVIVE) + { + to->vers_write= false; + } + error= to->file->ha_write_row(to->record[0]); to->auto_increment_field_not_null= FALSE; if (error) { diff --git a/sql/sql_tablespace.cc b/sql/sql_tablespace.cc index 93a3007d1ea..b07ec0b418c 100644 --- a/sql/sql_tablespace.cc +++ b/sql/sql_tablespace.cc @@ -22,6 +22,70 @@ #include "sql_table.h" // write_bin_log #include "sql_class.h" // THD +/** + Check if tablespace name is valid + + @param tablespace_name Name of the tablespace + + @note Tablespace names are not reflected in the file system, so + character case conversion or consideration is not relevant. + + @note Checking for path characters or ending space is not done. + The only checks are for identifier length, both in terms of + number of characters and number of bytes. + + @retval IDENT_NAME_OK Identifier name is ok (Success) + @retval IDENT_NAME_WRONG Identifier name is wrong, if length == 0 +* (ER_WRONG_TABLESPACE_NAME) + @retval IDENT_NAME_TOO_LONG Identifier name is too long if it is greater + than 64 characters (ER_TOO_LONG_IDENT) + + @note In case of IDENT_NAME_TOO_LONG or IDENT_NAME_WRONG, the function + reports an error (using my_error()). +*/ + +enum_ident_name_check check_tablespace_name(const char *tablespace_name) +{ + size_t name_length= 0; //< Length as number of bytes + size_t name_length_symbols= 0; //< Length as number of symbols + + // Name must be != NULL and length must be > 0 + if (!tablespace_name || (name_length= strlen(tablespace_name)) == 0) + { + my_error(ER_WRONG_TABLESPACE_NAME, MYF(0), tablespace_name); + return IDENT_NAME_WRONG; + } + + // If we do not have too many bytes, we must check the number of symbols, + // provided the system character set may use more than one byte per symbol. + if (name_length <= NAME_LEN && use_mb(system_charset_info)) + { + const char *name= tablespace_name; //< The actual tablespace name + const char *end= name + name_length; //< Pointer to first byte after name + + // Loop over all symbols as long as we don't have too many already + while (name != end && name_length_symbols <= NAME_CHAR_LEN) + { + int len= my_ismbchar(system_charset_info, name, end); + if (len) + name += len; + else + name++; + + name_length_symbols++; + } + } + + if (name_length_symbols > NAME_CHAR_LEN || name_length > NAME_LEN) + { + my_error(ER_TOO_LONG_IDENT, MYF(0), tablespace_name); + return IDENT_NAME_TOO_LONG; + } + + return IDENT_NAME_OK; +} + + int mysql_alter_tablespace(THD *thd, st_alter_tablespace *ts_info) { int error= HA_ADMIN_NOT_IMPLEMENTED; diff --git a/sql/sql_tablespace.h b/sql/sql_tablespace.h index ae77d15cbcb..b97c64f7965 100644 --- a/sql/sql_tablespace.h +++ b/sql/sql_tablespace.h @@ -19,6 +19,41 @@ class THD; class st_alter_tablespace; +/** + Enumerate possible status of a identifier name while determining + its validity +*/ +enum enum_ident_name_check +{ + IDENT_NAME_OK, + IDENT_NAME_WRONG, + IDENT_NAME_TOO_LONG +}; + +/** + Check if tablespace name is valid + + @param tablespace_name Name of the tablespace + + @note Tablespace names are not reflected in the file system, so + character case conversion or consideration is not relevant. + + @note Checking for path characters or ending space is not done. + The only checks are for identifier length, both in terms of + number of characters and number of bytes. + + @retval IDENT_NAME_OK Identifier name is ok (Success) + @retval IDENT_NAME_WRONG Identifier name is wrong, if length == 0 + (ER_WRONG_TABLESPACE_NAME) + @retval IDENT_NAME_TOO_LONG Identifier name is too long if it is greater + than 64 characters (ER_TOO_LONG_IDENT) + + @note In case of IDENT_NAME_TOO_LONG or IDENT_NAME_WRONG, the function + reports an error (using my_error()). +*/ + +enum_ident_name_check check_tablespace_name(const char *tablespace_name); + int mysql_alter_tablespace(THD* thd, st_alter_tablespace *ts_info); #endif /* SQL_TABLESPACE_INCLUDED */ diff --git a/sql/sql_time.cc b/sql/sql_time.cc index 309ede45ecc..276540e9dba 100644 --- a/sql/sql_time.cc +++ b/sql/sql_time.cc @@ -475,6 +475,7 @@ void localtime_to_TIME(MYSQL_TIME *to, struct tm *from) to->second= (int) from->tm_sec; } + void calc_time_from_sec(MYSQL_TIME *to, long seconds, long microseconds) { long t_seconds; diff --git a/sql/sql_time.h b/sql/sql_time.h index 1832e4501ed..28a2e2f50d2 100644 --- a/sql/sql_time.h +++ b/sql/sql_time.h @@ -170,6 +170,7 @@ bool calc_time_diff(const MYSQL_TIME *l_time1, const MYSQL_TIME *l_time2, int lsign, MYSQL_TIME *l_time3, ulonglong fuzzydate); int my_time_compare(const MYSQL_TIME *a, const MYSQL_TIME *b); void localtime_to_TIME(MYSQL_TIME *to, struct tm *from); + void calc_time_from_sec(MYSQL_TIME *to, long seconds, long microseconds); uint calc_week(MYSQL_TIME *l_time, uint week_behaviour, uint *year); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index d6c722246e7..44aad3ad9f6 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -1249,9 +1249,29 @@ bool Table_triggers_list::prepare_record_accessors(TABLE *table) *trg_fld= 0; DBUG_ASSERT(null_ptr <= extra_null_bitmap + null_bytes); bzero(extra_null_bitmap, null_bytes); + + if (table->versioned()) + { + vers_user_field= (Field **)alloc_root(&table->mem_root, + (table->s->fields - VERSIONING_FIELDS + 1) * + sizeof(Field*)); + if (!vers_user_field) + return 1; + Field **dst= vers_user_field; + for (Field **src= record0_field; *src; src++) + { + if ((*src)->vers_sys_field()) + continue; + *dst++= *src; + } + *dst= NULL; + } } else + { record0_field= table->field; + vers_user_field= table->vers_user_field; + } if (has_triggers(TRG_EVENT_UPDATE,TRG_ACTION_BEFORE) || has_triggers(TRG_EVENT_UPDATE,TRG_ACTION_AFTER) || diff --git a/sql/sql_trigger.h b/sql/sql_trigger.h index 8847680c7b2..43cbec7e433 100644 --- a/sql/sql_trigger.h +++ b/sql/sql_trigger.h @@ -146,6 +146,10 @@ class Table_triggers_list: public Sql_alloc Field **record0_field; uchar *extra_null_bitmap; /** + System Versioning: record0_field without system fields. + */ + Field **vers_user_field; + /** Copy of TABLE::Field array with field pointers set to TABLE::record[1] buffer instead of TABLE::record[0] (used for OLD values in on UPDATE trigger and DELETE trigger when it is called for REPLACE). @@ -208,7 +212,7 @@ public: /* End of character ser context. */ Table_triggers_list(TABLE *table_arg) - :record0_field(0), extra_null_bitmap(0), record1_field(0), + :record0_field(0), extra_null_bitmap(0), vers_user_field(0), record1_field(0), trigger_table(table_arg), m_has_unparseable_trigger(false), count(0) { @@ -273,6 +277,7 @@ public: TABLE_LIST *table_list); Field **nullable_fields() { return record0_field; } + Field **vers_user_fields() { return vers_user_field; } void reset_extra_null_bitmap() { size_t null_bytes= (trigger_table->s->stored_fields - @@ -307,13 +312,6 @@ private: } }; -inline Field **TABLE::field_to_fill() -{ - return triggers && triggers->nullable_fields() ? triggers->nullable_fields() - : field; -} - - bool add_table_for_trigger(THD *thd, const sp_name *trg_name, bool continue_if_not_exist, diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index 1d6edbc5fc9..27e405cd6b9 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -27,7 +27,8 @@ #include "sql_truncate.h" #include "wsrep_mysqld.h" #include "sql_show.h" //append_identifier() - +#include "sql_select.h" +#include "sql_delete.h" /** Append a list of field names to a string. @@ -481,7 +482,6 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) DBUG_RETURN(error); } - /** Execute a TRUNCATE statement at runtime. @@ -493,13 +493,20 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) bool Sql_cmd_truncate_table::execute(THD *thd) { bool res= TRUE; - TABLE_LIST *first_table= thd->lex->select_lex.table_list.first; + TABLE_LIST *table= thd->lex->select_lex.table_list.first; DBUG_ENTER("Sql_cmd_truncate_table::execute"); - if (check_one_table_access(thd, DROP_ACL, first_table)) + if (table->vers_conditions) + { + if (check_one_table_access(thd, DELETE_VERSIONING_ROWS_ACL, table)) + DBUG_RETURN(res); + DBUG_RETURN(mysql_delete(thd, table, NULL, NULL, -1, 0, NULL)); + } + + if (check_one_table_access(thd, DROP_ACL, table)) DBUG_RETURN(res); - if (! (res= truncate_table(thd, first_table))) + if (! (res= truncate_table(thd, table))) my_ok(thd); DBUG_RETURN(res); diff --git a/sql/sql_tvc.cc b/sql/sql_tvc.cc index 79e894f5a7f..56a6ae58a77 100644 --- a/sql/sql_tvc.cc +++ b/sql/sql_tvc.cc @@ -241,7 +241,7 @@ bool table_value_constr::prepare(THD *thd, SELECT_LEX *sl, /* Error's in 'new' will be detected after loop */ Item_type_holder *new_holder= new (thd->mem_root) Item_type_holder(thd, - &item->name, + item, holders[pos].type_handler(), &holders[pos]/*Type_all_attributes*/, holders[pos].get_maybe_null()); diff --git a/sql/sql_type.cc b/sql/sql_type.cc index e84bdb1455d..b4860176c0c 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -32,6 +32,7 @@ Type_handler_long type_handler_long; Type_handler_int24 type_handler_int24; Type_handler_longlong type_handler_longlong; Type_handler_longlong type_handler_ulonglong; // Only used for CAST() for now +Type_handler_vers_trx_id type_handler_vers_trx_id; Type_handler_float type_handler_float; Type_handler_double type_handler_double; Type_handler_bit type_handler_bit; @@ -658,7 +659,9 @@ Type_handler_hybrid_field_type::aggregate_for_comparison(const Type_handler *h) Item_result a= cmp_type(); Item_result b= h->cmp_type(); - if (a == STRING_RESULT && b == STRING_RESULT) + if (m_vers_trx_id && (a == STRING_RESULT || b == STRING_RESULT)) + m_type_handler= &type_handler_datetime; + else if (a == STRING_RESULT && b == STRING_RESULT) m_type_handler= &type_handler_long_blob; else if (a == INT_RESULT && b == INT_RESULT) m_type_handler= &type_handler_longlong; @@ -2063,6 +2066,19 @@ Field *Type_handler_longlong::make_table_field(const LEX_CSTRING *name, } +Field *Type_handler_vers_trx_id::make_table_field(const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE *table) const +{ + return new (table->in_use->mem_root) + Field_vers_trx_id(addr.ptr, attr.max_char_length(), + addr.null_ptr, addr.null_bit, + Field::NONE, name, + 0/*zerofill*/, attr.unsigned_flag); +} + + Field *Type_handler_float::make_table_field(const LEX_CSTRING *name, const Record_addr &addr, const Type_all_attributes &attr, diff --git a/sql/sql_type.h b/sql/sql_type.h index 427d59718f6..c400420586e 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -1891,6 +1891,17 @@ public: }; +class Type_handler_vers_trx_id: public Type_handler_longlong +{ +public: + virtual ~Type_handler_vers_trx_id() {} + Field *make_table_field(const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE *table) const; +}; + + class Type_handler_int24: public Type_handler_general_purpose_int { static const Name m_name_mediumint; @@ -2844,14 +2855,16 @@ public: class Type_handler_hybrid_field_type { const Type_handler *m_type_handler; + bool m_vers_trx_id; bool aggregate_for_min_max(const Type_handler *other); + public: Type_handler_hybrid_field_type(); Type_handler_hybrid_field_type(const Type_handler *handler) - :m_type_handler(handler) + :m_type_handler(handler), m_vers_trx_id(false) { } Type_handler_hybrid_field_type(const Type_handler_hybrid_field_type *other) - :m_type_handler(other->m_type_handler) + :m_type_handler(other->m_type_handler), m_vers_trx_id(other->m_vers_trx_id) { } void swap(Type_handler_hybrid_field_type &other) { @@ -2940,6 +2953,7 @@ extern MYSQL_PLUGIN_IMPORT Type_handler_int24 type_handler_int24; extern MYSQL_PLUGIN_IMPORT Type_handler_long type_handler_long; extern MYSQL_PLUGIN_IMPORT Type_handler_longlong type_handler_longlong; extern MYSQL_PLUGIN_IMPORT Type_handler_longlong type_handler_ulonglong; +extern MYSQL_PLUGIN_IMPORT Type_handler_vers_trx_id type_handler_vers_trx_id; extern MYSQL_PLUGIN_IMPORT Type_handler_newdecimal type_handler_newdecimal; extern MYSQL_PLUGIN_IMPORT Type_handler_olddecimal type_handler_olddecimal; diff --git a/sql/sql_union.cc b/sql/sql_union.cc index d987636a2ac..b3a7227678d 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -803,7 +803,7 @@ bool st_select_lex_unit::join_union_item_types(THD *thd_arg, /* Error's in 'new' will be detected after loop */ types.push_back(new (thd_arg->mem_root) Item_type_holder(thd_arg, - &item_tmp->name, + item_tmp, holders[pos].type_handler(), &holders[pos]/*Type_all_attributes*/, holders[pos].get_maybe_null())); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 0b6564d8e49..d14d50267fd 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -44,6 +44,9 @@ // mysql_derived_filling +#include "sql_insert.h" // For vers_insert_history_row() that may be + // needed for System Versioning. + /** True if the table's input and output record buffers are comparable using compare_record(TABLE*). @@ -152,6 +155,23 @@ static bool check_fields(THD *thd, List<Item> &items) return FALSE; } +static bool check_has_vers_fields(TABLE *table, List<Item> &items) +{ + List_iterator<Item> it(items); + if (!table->versioned()) + return false; + + while (Item *item= it++) + { + if (Item_field *item_field= item->field_for_view_update()) + { + Field *field= item_field->field; + if (field->table == table && !field->vers_update_unversioned()) + return true; + } + } + return false; +} /** Re-read record if more columns are needed for error message. @@ -283,6 +303,10 @@ int mysql_update(THD *thd, TABLE_LIST *update_source_table; query_plan.index= MAX_KEY; query_plan.using_filesort= FALSE; + + // For System Versioning (may need to insert new fields to a table). + ha_rows updated_sys_ver= 0; + DBUG_ENTER("mysql_update"); create_explain_query(thd->lex, thd->mem_root); @@ -353,12 +377,16 @@ int mysql_update(THD *thd, { DBUG_RETURN(1); } + bool has_vers_fields= check_has_vers_fields(table, fields); if (check_key_in_view(thd, table_list)) { my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE"); DBUG_RETURN(1); } + if (table->default_field) + table->mark_default_fields_for_write(false); + #ifndef NO_EMBEDDED_ACCESS_CHECKS /* Check values */ table_list->grant.want_privilege= table->grant.want_privilege= @@ -808,6 +836,11 @@ update_begin: THD_STAGE_INFO(thd, stage_updating); while (!(error=info.read_record()) && !thd->killed) { + if (table->versioned() && !table->vers_end_field()->is_max()) + { + continue; + } + explain->tracker.on_record_read(); thd->inc_examined_row_count(1); if (!select || select->skip_record(thd) > 0) @@ -817,10 +850,14 @@ update_begin: explain->tracker.on_record_after_where(); store_record(table,record[1]); + if (fill_record_n_invoke_before_triggers(thd, table, fields, values, 0, TRG_EVENT_UPDATE)) break; /* purecov: inspected */ + if (has_vers_fields && table->versioned()) + table->vers_update_fields(); + found++; if (!can_compare_record || compare_record(table)) @@ -880,19 +917,35 @@ update_begin: else { /* Non-batched update */ - error= table->file->ha_update_row(table->record[1], + error= table->file->ha_update_row(table->record[1], table->record[0]); } - if (!error || error == HA_ERR_RECORD_IS_THE_SAME) - { - if (error != HA_ERR_RECORD_IS_THE_SAME) - updated++; - else - error= 0; - } - else if (!ignore || + if (error == HA_ERR_RECORD_IS_THE_SAME) + { + error= 0; + } + else if (!error) + { + updated++; + + if (has_vers_fields && table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if ((error = vers_insert_history_row(table))) + { + restore_record(table, record[2]); + break; + } + restore_record(table, record[2]); + } + updated_sys_ver++; + } + } + else if (!ignore || table->file->is_fatal_error(error, HA_CHECK_ALL)) - { + { /* If (ignore && error is ignorable) we don't have to do anything; otherwise... @@ -1065,6 +1118,9 @@ update_end: else errcode= query_error_code(thd, killed_status == NOT_KILLED); + ScopedStatementReplication scoped_stmt_rpl( + table->versioned_by_engine() ? thd : NULL); + if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), transactional_table, FALSE, FALSE, errcode)) @@ -1089,9 +1145,15 @@ update_end: if (error < 0 && !thd->lex->analyze_stmt) { char buff[MYSQL_ERRMSG_SIZE]; - my_snprintf(buff, sizeof(buff), ER_THD(thd, ER_UPDATE_INFO), (ulong) found, - (ulong) updated, - (ulong) thd->get_stmt_da()->current_statement_warn_count()); + if (!table->versioned_by_sql()) + my_snprintf(buff, sizeof(buff), ER_THD(thd, ER_UPDATE_INFO), (ulong) found, + (ulong) updated, + (ulong) thd->get_stmt_da()->current_statement_warn_count()); + else + my_snprintf(buff, sizeof(buff), + ER_THD(thd, ER_UPDATE_INFO_WITH_SYSTEM_VERSIONING), + (ulong) found, (ulong) updated, (ulong) updated_sys_ver, + (ulong) thd->get_stmt_da()->current_statement_warn_count()); my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated, id, buff); DBUG_PRINT("info",("%ld records updated", (long) updated)); @@ -1700,8 +1762,10 @@ multi_update::multi_update(THD *thd_arg, TABLE_LIST *table_list, tmp_tables(0), updated(0), found(0), fields(field_list), values(value_list), table_count(0), copy_field(0), handle_duplicates(handle_duplicates_arg), do_update(1), trans_safe(1), - transactional_tables(0), ignore(ignore_arg), error_handled(0), prepared(0) -{} + transactional_tables(0), ignore(ignore_arg), error_handled(0), prepared(0), + updated_sys_ver(0) +{ +} /* @@ -1952,7 +2016,7 @@ static bool safe_update_on_fly(THD *thd, JOIN_TAB *join_tab, return !is_key_used(table, table->s->primary_key, table->write_set); return TRUE; default: - break; // Avoid compler warning + break; // Avoid compiler warning } return FALSE; @@ -2005,6 +2069,7 @@ multi_update::initialize_tables(JOIN *join) if (safe_update_on_fly(thd, join->join_tab, table_ref, all_tables)) { table_to_update= table; // Update table on the fly + has_vers_fields= check_has_vers_fields(table, *fields); continue; } } @@ -2177,6 +2242,11 @@ int multi_update::send_data(List<Item> ¬_used_values) if (table->status & (STATUS_NULL_ROW | STATUS_UPDATED)) continue; + if (table->versioned() && !table->vers_end_field()->is_max()) + { + continue; + } + if (table == table_to_update) { /* @@ -2189,6 +2259,7 @@ int multi_update::send_data(List<Item> ¬_used_values) table->status|= STATUS_UPDATED; store_record(table,record[1]); + if (fill_record_n_invoke_before_triggers(thd, table, *fields_for_table[offset], *values_for_table[offset], 0, @@ -2207,6 +2278,9 @@ int multi_update::send_data(List<Item> ¬_used_values) if (table->default_field && table->update_default_fields(1, ignore)) DBUG_RETURN(1); + if (has_vers_fields && table->versioned()) + table->vers_update_fields(); + if ((error= cur_table->view_check_option(thd, ignore)) != VIEW_CHECK_OK) { @@ -2254,6 +2328,21 @@ int multi_update::send_data(List<Item> ¬_used_values) error= 0; updated--; } + else if (has_vers_fields && table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if (vers_insert_history_row(table)) + { + restore_record(table, record[2]); + error= 1; + break; + } + restore_record(table, record[2]); + } + updated_sys_ver++; + } /* non-transactional or transactional table got modified */ /* either multi_update class' flag is raised in its branch */ if (table->file->has_transactions()) @@ -2280,6 +2369,7 @@ int multi_update::send_data(List<Item> ¬_used_values) */ uint field_num= 0; List_iterator_fast<TABLE> tbl_it(unupdated_check_opt_tables); + /* Set first tbl = table and then tbl to tables from tbl_it */ TABLE *tbl= table; do { @@ -2342,10 +2432,6 @@ void multi_update::abort_result_set() if (do_update && table_count > 1) { /* Add warning here */ - /* - todo/fixme: do_update() is never called with the arg 1. - should it change the signature to become argless? - */ (void) do_updates(); } } @@ -2437,6 +2523,8 @@ int multi_update::do_updates() if (table->vfield) empty_record(table); + has_vers_fields= check_has_vers_fields(table, *fields); + check_opt_it.rewind(); while(TABLE *tbl= check_opt_it++) { @@ -2547,19 +2635,40 @@ int multi_update::do_updates() goto err2; } } - if ((local_error=table->file->ha_update_row(table->record[1], - table->record[0])) && + if (has_vers_fields && table->versioned()) + table->vers_update_fields(); + + if ((local_error=table->file->ha_update_row(table->record[1], + table->record[0])) && local_error != HA_ERR_RECORD_IS_THE_SAME) { if (!ignore || table->file->is_fatal_error(local_error, HA_CHECK_ALL)) { err_table= table; - goto err; + goto err; } - } + } if (local_error != HA_ERR_RECORD_IS_THE_SAME) + { updated++; + + if (has_vers_fields && table->versioned()) + { + if (table->versioned_by_sql()) + { + store_record(table, record[2]); + if ((local_error= vers_insert_history_row(table))) + { + restore_record(table, record[2]); + err_table = table; + goto err; + } + restore_record(table, record[2]); + } + updated_sys_ver++; + } + } else local_error= 0; } @@ -2675,9 +2784,21 @@ bool multi_update::send_eof() thd->clear_error(); else errcode= query_error_code(thd, killed_status == NOT_KILLED); - if (thd->binlog_query(THD::ROW_QUERY_TYPE, - thd->query(), thd->query_length(), - transactional_tables, FALSE, FALSE, errcode)) + + bool force_stmt= false; + for (TABLE *table= all_tables->table; table; table= table->next) + { + if (table->versioned_by_engine()) + { + force_stmt= true; + break; + } + } + ScopedStatementReplication scoped_stmt_rpl(force_stmt ? thd : NULL); + + if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), + thd->query_length(), transactional_tables, FALSE, + FALSE, errcode)) { local_error= 1; // Rollback update } diff --git a/sql/sql_view.cc b/sql/sql_view.cc index dd8c6b70446..6ea4633385b 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -455,6 +455,136 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, goto err; } + for (SELECT_LEX *sl= select_lex; sl; sl= sl->next_select()) + { /* System Versioning: fix system fields of versioned view */ + // Similar logic as in mysql_derived_prepare() + // Leading versioning table detected implicitly (first one selected) + TABLE_LIST *impli_table= NULL; + // Leading versioning table specified explicitly + // (i.e. if at least one system field is selected) + TABLE_LIST *expli_table= NULL; + LEX_CSTRING impli_start, impli_end; + Item_field *expli_start= NULL, *expli_end= NULL; + + for (TABLE_LIST *table= tables; table; table= table->next_local) + { + DBUG_ASSERT(!table->is_view() || table->view); + + // Any versioned table in VIEW will add `FOR SYSTEM_TIME ALL` + WHERE: + // if there are at least one versioned table then VIEW will contain FOR_SYSTEM_TIME_ALL + // (because it is in fact LEX used to parse its SELECT). + if (table->is_view() && table->view->vers_conditions == FOR_SYSTEM_TIME_ALL) + { + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW `%s` is prohibited: versioned VIEW `%s` in query!", MYF(0), + view->table_name, + table->table_name); + res= true; + goto err; + } + + if (!table->table || !table->table->versioned()) + continue; + + const LString_i table_start= table->table->vers_start_field()->field_name; + const LString_i table_end= table->table->vers_end_field()->field_name; + + if (!impli_table) + { + impli_table= table; + impli_start= table_start; + impli_end= table_end; + } + + /* Implicitly add versioning fields if needed */ + Item *item; + List_iterator_fast<Item> it(sl->item_list); + + DBUG_ASSERT(table->alias); + while ((item= it++)) + { + if (item->real_item()->type() != Item::FIELD_ITEM) + continue; + Item_field *fld= (Item_field*) item->real_item(); + if (fld->table_name && 0 != my_strcasecmp(table_alias_charset, table->alias, fld->table_name)) + continue; + DBUG_ASSERT(fld->field_name.str); + if (table_start == fld->field_name) + { + if (expli_start) + { + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW `%s` is prohibited: multiple start system fields `%s.%s`, `%s.%s` in query!", MYF(0), + view->table_name, + expli_table->alias, + expli_start->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto err; + } + if (expli_table) + { + if (expli_table != table) + { +expli_table_err: + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW `%s` is prohibited: system fields from multiple tables `%s`, `%s` in query!", MYF(0), + view->table_name, + expli_table->alias, + table->alias); + res= true; + goto err; + } + } + else + expli_table= table; + expli_start= fld; + impli_end= table_end; + } + else if (table_end == fld->field_name) + { + if (expli_end) + { + my_printf_error( + ER_VERS_VIEW_PROHIBITED, + "Creating VIEW `%s` is prohibited: multiple end system fields `%s.%s`, `%s.%s` in query!", MYF(0), + view->table_name, + expli_table->alias, + expli_end->field_name.str, + table->alias, + fld->field_name.str); + res= true; + goto err; + } + if (expli_table) + { + if (expli_table != table) + goto expli_table_err; + } + else + expli_table= table; + expli_end= fld; + impli_start= table_start; + } + } // while ((item= it++)) + } // for (TABLE_LIST *table) + + if (expli_table) + impli_table= expli_table; + + if (impli_table) + { + if (!expli_start && sl->vers_push_field(thd, impli_table, impli_start)) + goto err; + if (!expli_end && sl->vers_push_field(thd, impli_table, impli_end)) + goto err; + } + } /* System Versioning end */ + view= lex->unlink_first_table(&link_to_local); if (check_db_dir_existence(view->db)) @@ -607,14 +737,22 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, view->table_name, item->name.str) & VIEW_ANY_ACL); - if (fld && !fld->field->table->s->tmp_table) + if (!fld) + continue; + TABLE_SHARE *s= fld->field->table->s; + const LString_i field_name= fld->field->field_name; + if (s->tmp_table || + (s->versioned && + (field_name == s->vers_start_field()->field_name || + field_name == s->vers_end_field()->field_name))) { + continue; + } - final_priv&= fld->have_privileges; + final_priv&= fld->have_privileges; - if (~fld->have_privileges & priv) - report_item= item; - } + if (~fld->have_privileges & priv) + report_item= item; } } @@ -2016,7 +2154,7 @@ bool check_key_in_view(THD *thd, TABLE_LIST *view) RETURN FALSE OK - TRUE error (is not sent to cliet) + TRUE error (is not sent to client) */ bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view) @@ -2033,7 +2171,15 @@ bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view) { Item_field *fld; if ((fld= entry->item->field_for_view_update())) + { + TABLE_SHARE *s= fld->context->table_list->table->s; + LString_i field_name= fld->field_name; + if (s->versioned && + (field_name == s->vers_start_field()->field_name || + field_name == s->vers_end_field()->field_name)) + continue; list->push_back(fld, thd->mem_root); + } else { my_error(ER_NON_INSERTABLE_TABLE, MYF(0), view->alias, "INSERT"); @@ -2044,7 +2190,7 @@ bool insert_view_fields(THD *thd, List<Item> *list, TABLE_LIST *view) } /* - checking view md5 check suum + checking view md5 check sum SINOPSYS view_checksum() diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index b2227bdbbbf..6e34e1974a1 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -68,6 +68,7 @@ #include "sql_lex.h" #include "sql_sequence.h" #include "sql_tvc.h" +#include "vers_utils.h" /* this is to get the bison compilation windows warnings out */ #ifdef _MSC_VER @@ -738,6 +739,32 @@ bool LEX::set_bincmp(CHARSET_INFO *cs, bool bin) MYSQL_YYABORT; \ } while(0) + +inline void vers_select_conds_t::init( + vers_range_type_t t, + vers_range_unit_t u_start= UNIT_AUTO, + Item * s= NULL, + vers_range_unit_t u_end= UNIT_AUTO, + Item * e= NULL) +{ + type= t; + unit_start= u_start; + unit_end= u_end; + start= fix_dec(s); + end= fix_dec(e); + import_outer= from_inner= false; +} + +inline Item *vers_select_conds_t::fix_dec(Item *item) +{ + if (item && item->decimals == 0 && item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::NOW_FUNC) + item->decimals= 6; + + return item; +} + + Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) { Virtual_column_info *v= new (thd->mem_root) Virtual_column_info(); @@ -857,6 +884,8 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) enum Window_frame::Frame_exclusion frame_exclusion; enum trigger_order_type trigger_action_order_type; DDL_options_st object_ddl_options; + enum vers_range_unit_t vers_range_unit; + enum Column_definition::enum_column_versioning vers_column_versioning; } %{ @@ -867,15 +896,16 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %parse-param { THD *thd } %lex-param { THD *thd } /* - Currently there are 104 shift/reduce conflicts. + Currently there are 125 shift/reduce conflicts. We should not introduce new conflicts any more. */ -%expect 104 +%expect 125 /* Comments for TOKENS. For each token, please include in the same line a comment that contains the following tags: + SQL-2011-R : Reserved keyword as per SQL-2011 SQL-2011-N : Non Reserved keyword as per SQL-2011 SQL-2003-R : Reserved keyword as per SQL-2003 SQL-2003-N : Non Reserved keyword as per SQL-2003 @@ -1100,6 +1130,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token FORCE_SYM %token FOREIGN /* SQL-2003-R */ %token FOR_SYM /* SQL-2003-R */ +%token FOR_SYSTEM_TIME_SYM /* INTERNAL */ %token FORMAT_SYM %token FOUND_SYM /* SQL-2003-R */ %token FROM @@ -1333,6 +1364,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token PERCENT_RANK_SYM %token PERCENTILE_CONT_SYM %token PERCENTILE_DISC_SYM +%token PERIOD_SYM /* SQL-2011-R */ %token PERSISTENT_SYM %token PHASE_SYM %token PLUGINS_SYM @@ -1499,6 +1531,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SWAPS_SYM %token SWITCHES_SYM %token SYSDATE +%token SYSTEM /* SQL-2011-R */ +%token SYSTEM_TIME_SYM /* SQL-2011-R */ %token TABLES %token TABLESPACE %token TABLE_REF_PRIORITY @@ -1569,6 +1603,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token VARIANCE_SYM %token VARYING /* SQL-2003-R */ %token VAR_SAMP_SYM +%token VERSIONING_SYM /* SQL-2011-R */ %token VIA_SYM %token VIEW_SYM /* SQL-2003-N */ %token VIRTUAL_SYM @@ -1582,8 +1617,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WHILE_SYM %token WITH /* SQL-2003-R */ %token WITHIN +%token WITHOUT /* SQL-2003-R */ %token WITH_CUBE_SYM /* INTERNAL */ %token WITH_ROLLUP_SYM /* INTERNAL */ +%token WITH_SYSTEM_SYM /* INTERNAL */ %token WORK_SYM /* SQL-2003-N */ %token WRAPPER_SYM %token WRITE_SYM /* SQL-2003-N */ @@ -1689,7 +1726,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); optional_flush_tables_arguments opt_time_precision kill_type kill_option int_num opt_default_time_precision - case_stmt_body opt_bin_mod + case_stmt_body opt_bin_mod opt_for_system_time_clause opt_if_exists_table_element opt_if_not_exists_table_element opt_recursive @@ -1919,6 +1956,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); keep_gcc_happy key_using_alg part_column_list + period_for_system_time server_def server_options_list server_option definer_opt no_definer definer get_diagnostics parse_vcol_expr vcol_opt_specifier vcol_opt_attribute @@ -1927,6 +1965,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); explainable_command opt_lock_wait_timeout opt_delete_gtid_domain + asrow_attribute END_OF_INPUT %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt @@ -1947,6 +1986,7 @@ END_OF_INPUT %type <num> sp_decl_idents sp_decl_idents_init_vars %type <num> sp_handler_type sp_hcond_list +%type <num> start_or_end %type <spcondvalue> sp_cond sp_hcond sqlstate signal_value opt_signal_value %type <spblock> sp_decls sp_decl sp_decl_body sp_decl_variable_list %type <spname> sp_name @@ -1984,7 +2024,6 @@ END_OF_INPUT %type <frame_exclusion> opt_window_frame_exclusion; %type <window_frame_bound> window_frame_start window_frame_bound; - %type <NONE> '-' '+' '*' '/' '%' '(' ')' ',' '!' '{' '}' '&' '|' AND_SYM OR_SYM OR_OR_SYM BETWEEN_SYM CASE_SYM @@ -1997,6 +2036,8 @@ END_OF_INPUT %type <lex_str_list> opt_with_column_list +%type <vers_range_unit> opt_trans_or_timestamp +%type <vers_column_versioning> with_or_without_system %% @@ -4996,7 +5037,7 @@ create_like: opt_create_select: /* empty */ {} - | opt_duplicate opt_as create_select_query_expression + | opt_duplicate opt_as create_select_query_expression opt_versioning_option ; create_select_query_expression: @@ -5143,6 +5184,10 @@ part_type_def: } | LIST_SYM part_column_list { Lex->part_info->part_type= LIST_PARTITION; } + | SYSTEM_TIME_SYM + { if (Lex->part_info->vers_init_info(thd)) MYSQL_YYABORT; } + opt_versioning_interval + opt_versioning_limit ; opt_linear: @@ -5350,6 +5395,7 @@ part_definition: MYSQL_YYABORT; } p_elem->part_state= PART_NORMAL; + p_elem->id= part_info->partitions.elements - 1; part_info->curr_part_elem= p_elem; part_info->current_partition= p_elem; part_info->use_default_partitions= FALSE; @@ -5418,6 +5464,62 @@ opt_part_values: part_info->part_type= LIST_PARTITION; } part_values_in {} + | AS OF_SYM NOW_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *elem= part_info->curr_part_elem; + if (! lex->is_partition_management()) + { + if (part_info->part_type != VERSIONING_PARTITION) + my_yyabort_error((ER_PARTITION_WRONG_TYPE, MYF(0), + "BY SYSTEM_TIME")); + } + else + { + DBUG_ASSERT(Lex->create_last_non_select_table); + DBUG_ASSERT(Lex->create_last_non_select_table->table_name); + // FIXME: other ALTER commands? + my_yyabort_error((ER_VERS_WRONG_PARTS, MYF(0), + Lex->create_last_non_select_table->table_name)); + } + elem->type(partition_element::AS_OF_NOW); + DBUG_ASSERT(part_info->vers_info); + part_info->vers_info->now_part= elem; + if (part_info->init_column_part(thd)) + { + MYSQL_YYABORT; + } + } + | VERSIONING_SYM + { + LEX *lex= Lex; + partition_info *part_info= lex->part_info; + partition_element *elem= part_info->curr_part_elem; + if (! lex->is_partition_management()) + { + if (part_info->part_type != VERSIONING_PARTITION) + my_yyabort_error((ER_PARTITION_WRONG_TYPE, MYF(0), + "BY SYSTEM_TIME")); + } + else + { + part_info->vers_init_info(thd); + elem->id= UINT32_MAX; + } + DBUG_ASSERT(part_info->vers_info); + if (part_info->vers_info->now_part) + { + DBUG_ASSERT(Lex->create_last_non_select_table); + DBUG_ASSERT(Lex->create_last_non_select_table->table_name); + my_yyabort_error((ER_VERS_WRONG_PARTS, MYF(0), Lex->create_last_non_select_table->table_name)); + } + elem->type(partition_element::VERSIONING); + if (part_info->init_column_part(thd)) + { + MYSQL_YYABORT; + } + } | DEFAULT { LEX *lex= Lex; @@ -5663,6 +5765,7 @@ sub_part_definition: mem_alloc_error(sizeof(partition_element)); MYSQL_YYABORT; } + sub_p_elem->id= curr_part->subpartitions.elements - 1; part_info->curr_part_elem= sub_p_elem; part_info->use_default_subpartitions= FALSE; part_info->use_default_num_subpartitions= FALSE; @@ -5719,6 +5822,38 @@ opt_part_option: { Lex->part_info->curr_part_elem->part_comment= $3.str; } ; +opt_versioning_interval: + /* empty */ {} + | INTERVAL_SYM expr interval + { + partition_info *part_info= Lex->part_info; + DBUG_ASSERT(part_info->part_type == VERSIONING_PARTITION); + INTERVAL interval; + if (get_interval_value($2, $3, &interval) || + part_info->vers_set_interval(interval)) + { + my_error(ER_PART_WRONG_VALUE, MYF(0), + Lex->create_last_non_select_table->table_name, "INTERVAL"); + MYSQL_YYABORT; + } + } + ; + +opt_versioning_limit: + /* empty */ {} + | LIMIT ulonglong_num + { + partition_info *part_info= Lex->part_info; + DBUG_ASSERT(part_info->part_type == VERSIONING_PARTITION); + if (part_info->vers_set_limit($2)) + { + my_error(ER_PART_WRONG_VALUE, MYF(0), + Lex->create_last_non_select_table->table_name, "LIMIT"); + MYSQL_YYABORT; + } + } + ; + /* End of partition parser part */ @@ -6085,6 +6220,32 @@ create_table_option: { Lex->create_info.used_fields|= HA_CREATE_USED_SEQUENCE; Lex->create_info.sequence= ($3 == HA_CHOICE_YES); + } + | versioning_option + ; + +opt_versioning_option: + /* empty */ + | versioning_option + ; + +versioning_option: + WITH_SYSTEM_SYM VERSIONING_SYM + { + if (Lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) + { + if (!thd->variables.vers_force) + { + my_error(ER_WRONG_USAGE, MYF(0), + "TEMPORARY", "WITH SYSTEM VERSIONING"); + MYSQL_YYABORT; + } + } + else + { + Lex->vers_get_info().with_system_versioning= true; + Lex->create_info.options|= HA_VERSIONED_TABLE; + } } ; @@ -6185,6 +6346,7 @@ field_list_item: column_def { } | key_def | constraint_def + | period_for_system_time ; column_def: @@ -6284,6 +6446,15 @@ constraint_def: } ; +period_for_system_time: + // If FOR_SYM is followed by SYSTEM_TIME_SYM then they are merged to: FOR_SYSTEM_TIME_SYM . + PERIOD_SYM FOR_SYSTEM_TIME_SYM '(' ident ',' ident ')' + { + Vers_parse_info &info= Lex->vers_get_info(); + info.set_period_for_system_time($4, $6); + } + ; + opt_check_constraint: /* empty */ { $$= (Virtual_column_info*) 0; } | check_constraint { $$= $1;} @@ -6369,6 +6540,15 @@ opt_serial_attribute_list: | serial_attribute ; +opt_asrow_attribute: + /* empty */ {} + | opt_asrow_attribute_list {} + ; + +opt_asrow_attribute_list: + opt_asrow_attribute_list asrow_attribute {} + | asrow_attribute + ; field_def: opt_attribute @@ -6378,6 +6558,46 @@ field_def: Lex->last_field->flags&= ~NOT_NULL_FLAG; // undo automatic NOT NULL for timestamps } vcol_opt_specifier vcol_opt_attribute + | opt_generated_always AS ROW_SYM start_or_end opt_asrow_attribute + { + LEX *lex= Lex; + Vers_parse_info &info= lex->vers_get_info(); + const LEX_CSTRING &field_name= lex->last_field->field_name; + + LString_i *p; + switch ($4) + { + case 1: + p= &info.as_row.start; + if (*p) + { + my_yyabort_error((ER_VERS_DUPLICATE_ROW_START_END, MYF(0), + "START", field_name.str)); + } + lex->last_field->flags|= VERS_SYS_START_FLAG; + break; + case 0: + p= &info.as_row.end; + if (*p) + { + my_yyabort_error((ER_VERS_DUPLICATE_ROW_START_END, MYF(0), + "END", field_name.str)); + } + lex->last_field->flags|= VERS_SYS_END_FLAG; + break; + default: + /* Not Reachable */ + MYSQL_YYABORT; + break; + } + DBUG_ASSERT(p); + *p= field_name; + } + ; + +start_or_end: + START_SYM { $$ = 1; } + | END { $$ = 0; } ; opt_generated_always: @@ -6837,15 +7057,34 @@ opt_compression_method: | equal ident { $$= $2.str; } ; -serial_attribute: - not NULL_SYM { Lex->last_field->flags|= NOT_NULL_FLAG; } +asrow_attribute: + not NULL_SYM + { + Lex->last_field->flags|= NOT_NULL_FLAG; + } | opt_primary KEY_SYM { LEX *lex=Lex; lex->last_field->flags|= PRI_KEY_FLAG | NOT_NULL_FLAG; lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; } - | vcol_attribute + | UNIQUE_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= UNIQUE_KEY_FLAG; + lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; + } + | UNIQUE_SYM KEY_SYM + { + LEX *lex=Lex; + lex->last_field->flags|= UNIQUE_KEY_FLAG; + lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; + } + | COMMENT_SYM TEXT_STRING_sys { Lex->last_field->comment= $2; } + ; + +serial_attribute: + asrow_attribute | IDENT_sys equal TEXT_STRING_sys { if ($3.length > ENGINE_OPTION_MAX_LENGTH) @@ -6873,6 +7112,24 @@ serial_attribute: new (thd->mem_root) engine_option_value($1, &Lex->last_field->option_list, &Lex->option_list_last); } + | with_or_without_system VERSIONING_SYM + { + Lex->last_field->versioning= $1; + Lex->create_info.options|= HA_VERSIONED_TABLE; + } + ; + +with_or_without_system: + WITH_SYSTEM_SYM + { + Lex->create_info.vers_info.versioned_fields= true; + $$= Column_definition::WITH_VERSIONING; + } + | WITHOUT SYSTEM + { + Lex->create_info.vers_info.unversioned_fields= true; + $$= Column_definition::WITHOUT_VERSIONING; + } ; @@ -7831,6 +8088,9 @@ alter_list_item: Lex->create_last_non_select_table= Lex->last_table(); Lex->alter_info.flags|= Alter_info::ALTER_ADD_INDEX; } + | ADD period_for_system_time + { + } | add_column '(' create_field_list ')' { Lex->alter_info.flags|= Alter_info::ALTER_ADD_COLUMN | @@ -7995,6 +8255,15 @@ alter_list_item: } | alter_algorithm_option | alter_lock_option + | ADD SYSTEM VERSIONING_SYM + { + Lex->vers_get_info().with_system_versioning= true; + Lex->create_info.options|= HA_VERSIONED_TABLE; + } + | DROP SYSTEM VERSIONING_SYM + { + Lex->vers_get_info().without_system_versioning= true; + } ; opt_index_lock_algorithm: @@ -8873,6 +9142,7 @@ table_expression: opt_group_clause opt_having_clause opt_window_clause + opt_system_time_clause ; opt_table_expression: @@ -8907,6 +9177,78 @@ select_options: } ; +opt_trans_or_timestamp: + /* empty */ + { + $$ = UNIT_AUTO; + } + | TRANSACTION_SYM + { + $$ = UNIT_TRX_ID; + } + | TIMESTAMP + { + $$ = UNIT_TIMESTAMP; + } + ; + +opt_system_time_clause: + /* empty */ + {} + | FOR_SYSTEM_TIME_SYM system_time_expr + { + DBUG_ASSERT(Select); + int used= 0; + if (Lex->vers_conditions) + { + for (TABLE_LIST *table= Select->table_list.first; table; table= table->next_local) + { + if (!table->vers_conditions) + { + table->vers_conditions= Lex->vers_conditions; + used++; + } + } + if (!used) + { + my_yyabort_error((ER_VERS_UNUSED_CLAUSE, MYF(0), "SYSTEM_TIME")); + } + } + } + ; + +opt_for_system_time_clause: + /* empty */ + { + $$= false; + } + | FOR_SYSTEM_TIME_SYM system_time_expr + { + $$= true; + } + ; + +system_time_expr: + AS OF_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_AS_OF, $3, $4); + } + | ALL + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_ALL); + } + | FROM opt_trans_or_timestamp simple_expr + TO_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_FROM_TO, $2, $3, $5, $6); + } + | BETWEEN_SYM opt_trans_or_timestamp simple_expr + AND_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_BETWEEN, $2, $3, $5, $6); + } + ; + select_option_list: select_option_list select_option | select_option @@ -11426,12 +11768,13 @@ table_factor: table_primary_ident: { + DBUG_ASSERT(Select); SELECT_LEX *sel= Select; sel->table_join_options= 0; } - table_ident opt_use_partition opt_table_alias opt_key_definition + table_ident opt_use_partition opt_for_system_time_clause opt_table_alias opt_key_definition { - if (!($$= Select->add_table_to_list(thd, $2, $4, + if (!($$= Select->add_table_to_list(thd, $2, $5, Select->get_table_join_options(), YYPS->m_lock_type, YYPS->m_mdl_type, @@ -11439,6 +11782,8 @@ table_primary_ident: $3))) MYSQL_YYABORT; Select->add_joined_table($$); + if ($4) + $$->vers_conditions= Lex->vers_conditions; } ; @@ -11461,11 +11806,11 @@ table_primary_ident: */ table_primary_derived: - '(' get_select_lex select_derived_union ')' opt_table_alias + '(' get_select_lex select_derived_union ')' opt_for_system_time_clause opt_table_alias { /* Use $2 instead of Lex->current_select as derived table will alter value of Lex->current_select. */ - if (!($3 || $5) && $2->embedding && + if (!($3 || $6) && $2->embedding && !$2->embedding->nested_join->join_list.elements) { /* we have a derived table ($3 == NULL) but no alias, @@ -11488,7 +11833,7 @@ table_primary_derived: if (ti == NULL) MYSQL_YYABORT; if (!($$= sel->add_table_to_list(thd, - ti, $5, 0, + ti, $6, 0, TL_READ, MDL_SHARED_READ))) MYSQL_YYABORT; @@ -11496,7 +11841,7 @@ table_primary_derived: lex->pop_context(); lex->nest_level--; } - else if ($5 != NULL) + else if ($6 != NULL) { /* Tables with or without joins within parentheses cannot @@ -11520,11 +11865,13 @@ table_primary_derived: if ($$ && $$->derived && !$$->derived->first_select()->next_select()) $$->select_lex->add_where_field($$->derived->first_select()); + if ($5) + $$->vers_conditions= Lex->vers_conditions; } /* Represents derived table with WITH clause */ | '(' get_select_lex subselect_start with_clause query_expression_body - subselect_end ')' opt_table_alias + subselect_end ')' opt_for_system_time_clause opt_table_alias { LEX *lex=Lex; SELECT_LEX *sel= $2; @@ -11535,10 +11882,12 @@ table_primary_derived: $5->set_with_clause($4); lex->current_select= sel; if (!($$= sel->add_table_to_list(lex->thd, - ti, $8, 0, + ti, $9, 0, TL_READ, MDL_SHARED_READ))) MYSQL_YYABORT; sel->add_joined_table($$); + if ($8) + $$->vers_conditions= Lex->vers_conditions; } ; @@ -13182,8 +13531,17 @@ opt_delete_option: | IGNORE_SYM { Lex->ignore= 1; } ; +truncate_end: + opt_lock_wait_timeout + | TO_SYM SYSTEM_TIME_SYM opt_trans_or_timestamp simple_expr + { + Lex->vers_conditions.init(FOR_SYSTEM_TIME_BEFORE, $3, $4); + Lex->last_table()->vers_conditions= Lex->vers_conditions; + } + ; + truncate: - TRUNCATE_SYM opt_table_sym + TRUNCATE_SYM { LEX* lex= Lex; lex->sql_command= SQLCOM_TRUNCATE; @@ -13194,7 +13552,7 @@ truncate: YYPS->m_lock_type= TL_WRITE; YYPS->m_mdl_type= MDL_EXCLUSIVE; } - table_name opt_lock_wait_timeout + opt_table_sym table_name truncate_end { LEX* lex= thd->lex; DBUG_ASSERT(!lex->m_sql_cmd); @@ -13489,13 +13847,21 @@ show_param: Lex->set_command(SQLCOM_SHOW_CREATE_DB, $3); Lex->name= $4; } - | CREATE TABLE_SYM table_ident + | CREATE TABLE_SYM table_ident opt_for_system_time_clause { LEX *lex= Lex; lex->sql_command = SQLCOM_SHOW_CREATE; if (!lex->select_lex.add_table_to_list(thd, $3, NULL,0)) MYSQL_YYABORT; lex->create_info.storage_media= HA_SM_DEFAULT; + + if (lex->vers_conditions.type != FOR_SYSTEM_TIME_UNSPECIFIED && + lex->vers_conditions.type != FOR_SYSTEM_TIME_AS_OF) + { + my_yyabort_error((ER_VERS_RANGE_PROHIBITED, MYF(0))); + } + if ($4) + Lex->last_table()->vers_conditions= Lex->vers_conditions; } | CREATE VIEW_SYM table_ident { @@ -14961,6 +15327,7 @@ keyword_alias: | OTHERS_SYM {} | OWNER_SYM {} | PARSER_SYM {} + | PERIOD_SYM {} | PORT_SYM {} | PRECEDES_SYM {} | PRECEDING_SYM {} @@ -15343,6 +15710,8 @@ keyword_sp_not_data_type: | SUSPEND_SYM {} | SWAPS_SYM {} | SWITCHES_SYM {} + | SYSTEM {} + | SYSTEM_TIME_SYM {} | TABLE_NAME_SYM {} | TABLES {} | TABLE_CHECKSUM_SYM {} @@ -15368,6 +15737,7 @@ keyword_sp_not_data_type: | USER_SYM {} | USE_FRM {} | VARIABLES {} + | VERSIONING_SYM {} | VIEW_SYM {} | VIRTUAL_SYM {} | VALUE_SYM {} @@ -15375,6 +15745,7 @@ keyword_sp_not_data_type: | WAIT_SYM {} | WEEK_SYM {} | WEIGHT_STRING_SYM {} + | WITHOUT {} | WORK_SYM {} | X509_SYM {} | XML_SYM {} @@ -15804,6 +16175,12 @@ set_expr_or_default: if ($$ == NULL) MYSQL_YYABORT; } + | DROP + { + $$=new (thd->mem_root) Item_string_sys(thd, "DROP", 4); + if ($$ == NULL) + MYSQL_YYABORT; + } ; /* Lock function */ @@ -16186,6 +16563,7 @@ object_privilege: | EVENT_SYM { Lex->grant |= EVENT_ACL;} | TRIGGER_SYM { Lex->grant |= TRIGGER_ACL; } | CREATE TABLESPACE { Lex->grant |= CREATE_TABLESPACE_ACL; } + | DELETE_SYM VERSIONING_SYM ROWS_SYM { Lex->grant |= DELETE_VERSIONING_ROWS_ACL; } ; opt_and: @@ -16965,9 +17343,14 @@ trigger_tail: } table_ident /* $10 */ FOR_SYM - remember_name /* $12 */ - { /* $13 */ - Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start(); + remember_name /* $13 */ + { /* $14 */ + /* + FOR token is already passed through (see 'case FOR_SYM' in sql_lex.cc), + so we use _prev() to get it back. + */ + DBUG_ASSERT(YYLIP->lookahead_token >= 0); + Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start_prev(); } EACH_SYM ROW_SYM diff --git a/sql/sql_yacc_ora.yy b/sql/sql_yacc_ora.yy index 54c27783b0a..3ea1dc25595 100644 --- a/sql/sql_yacc_ora.yy +++ b/sql/sql_yacc_ora.yy @@ -508,6 +508,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token FORCE_SYM %token FOREIGN /* SQL-2003-R */ %token FOR_SYM /* SQL-2003-R */ +%token FOR_SYSTEM_TIME_SYM /* INTERNAL */ %token FORMAT_SYM %token FOUND_SYM /* SQL-2003-R */ %token FROM @@ -741,6 +742,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token PERCENT_RANK_SYM %token PERCENTILE_CONT_SYM %token PERCENTILE_DISC_SYM +%token PERIOD_SYM /* 32N2439 */ %token PERSISTENT_SYM %token PHASE_SYM %token PLUGINS_SYM @@ -907,6 +909,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token SWAPS_SYM %token SWITCHES_SYM %token SYSDATE +%token SYSTEM /* 32N2439 */ +%token SYSTEM_TIME_SYM /* 32N2439 */ %token TABLES %token TABLESPACE %token TABLE_REF_PRIORITY @@ -977,6 +981,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token VARIANCE_SYM %token VARYING /* SQL-2003-R */ %token VAR_SAMP_SYM +%token VERSIONING_SYM /* 32N2439 */ %token VIA_SYM %token VIEW_SYM /* SQL-2003-N */ %token VIRTUAL_SYM @@ -990,8 +995,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WHILE_SYM %token WITH /* SQL-2003-R */ %token WITHIN +%token WITHOUT /* SQL-2003-R */ %token WITH_CUBE_SYM /* INTERNAL */ %token WITH_ROLLUP_SYM /* INTERNAL */ +%token WITH_SYSTEM_SYM /* INTERNAL */ %token WORK_SYM /* SQL-2003-N */ %token WRAPPER_SYM %token WRITE_SYM /* SQL-2003-N */ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index c67be1dbaf8..adef93ef205 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -387,6 +387,55 @@ static Sys_var_charptr Sys_my_bind_addr( READ_ONLY GLOBAL_VAR(my_bind_addr_str), CMD_LINE(REQUIRED_ARG), IN_FS_CHARSET, DEFAULT(0)); +const char *Sys_var_vers_asof::asof_keywords[]= {"CURRENT", "ALL", NULL}; +static Sys_var_vers_asof Sys_vers_asof_timestamp( + "versioning_asof_timestamp", "Default AS OF value for versioned queries", + SESSION_VAR(vers_asof_timestamp.type), NO_CMD_LINE, + Sys_var_vers_asof::asof_keywords, DEFAULT(FOR_SYSTEM_TIME_UNSPECIFIED)); + +static Sys_var_mybool Sys_vers_force( + "versioning_force", "Force system versioning for all created tables", + SESSION_VAR(vers_force), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static const char *vers_hide_keywords[]= {"AUTO", "IMPLICIT", "FULL", "NEVER", NULL}; +static Sys_var_enum Sys_vers_hide( + "versioning_hide", "Hide system versioning from being displayed in table info. " + "AUTO: hide implicit system fields only in non-versioned and AS OF queries; " + "IMPLICIT: hide implicit system fields in all queries; " + "FULL: hide any system fields in all queries and hide versioning info in SHOW commands; " + "NEVER: don't hide system fields", + SESSION_VAR(vers_hide), CMD_LINE(REQUIRED_ARG), + vers_hide_keywords, DEFAULT(VERS_HIDE_AUTO)); + +static Sys_var_mybool Sys_vers_innodb_algorithm_simple( + "versioning_innodb_algorithm_simple", + "Use simple algorithm of timestamp handling in InnoDB instead of TRX_SEES", + SESSION_VAR(vers_innodb_algorithm_simple), CMD_LINE(OPT_ARG), + DEFAULT(TRUE)); + +static const char *vers_alter_history_keywords[]= {"ERROR", "KEEP",/* "SURVIVE", "DROP",*/ NULL}; +static Sys_var_enum Sys_vers_alter_history( + "versioning_alter_history", "Versioning ALTER TABLE mode. " + "ERROR: Fail ALTER with error; " /* TODO: fail only when history non-empty */ + "KEEP: Keep historical system rows and subject them to ALTER; " + /*"SURVIVE: Keep historical system rows intact; " + "DROP: Drop historical system rows while processing ALTER"*/, + SESSION_VAR(vers_alter_history), CMD_LINE(REQUIRED_ARG), + vers_alter_history_keywords, DEFAULT(VERS_ALTER_HISTORY_ERROR)); + +static bool update_transaction_registry(sys_var *self, THD *thd, enum_var_type type) +{ + use_transaction_registry= opt_transaction_registry; + return false; +} + +static Sys_var_mybool Sys_transaction_registry( + "transaction_registry", + "Enable or disable update of transaction_registry", + GLOBAL_VAR(opt_transaction_registry), CMD_LINE(OPT_ARG), + DEFAULT(TRUE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + 0, ON_UPDATE(update_transaction_registry)); + static Sys_var_ulonglong Sys_binlog_cache_size( "binlog_cache_size", "The size of the transactional cache for " "updates to transactional engines for the binary log. " diff --git a/sql/sys_vars.ic b/sql/sys_vars.ic index 706240727c5..a2ea75d0dc2 100644 --- a/sql/sys_vars.ic +++ b/sql/sys_vars.ic @@ -77,6 +77,11 @@ #define GET_HA_ROWS GET_ULONG #endif +// Disable warning caused by SESSION_VAR() macro +#ifdef __clang__ +#pragma clang diagnostic ignored "-Winvalid-offsetof" +#endif + /* special assert for sysvars. Tells the name of the variable, and fails even in non-debug builds. @@ -2597,3 +2602,87 @@ public: bool global_update(THD *thd, set_var *var); uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base); }; + + +class Sys_var_vers_asof: public Sys_var_enum +{ +public: + static const char *asof_keywords[]; + +public: + Sys_var_vers_asof(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, const char *values[], + uint def_val) + : Sys_var_enum(name_arg, comment, flag_args, off, size, + getopt, values, def_val) + { + // setval() accepts string rather enum + option.var_type= GET_STR; + } + virtual bool do_check(THD *thd, set_var *var) + { + if (!Sys_var_enum::do_check(thd, var)) + return false; + MYSQL_TIME ltime; + bool res= var->value->get_date(<ime, 0); + if (!res) + { + var->save_result.ulonglong_value= FOR_SYSTEM_TIME_AS_OF; + } + return res; + } + +private: + bool update(set_var *var, st_vers_asof_timestamp &out) + { + bool res= false; + out.type= static_cast<enum_var_type>(var->save_result.ulonglong_value); + if (out.type == FOR_SYSTEM_TIME_AS_OF) + { + res= var->value->get_date(&out.ltime, 0); + } + return res; + } + +public: + virtual bool global_update(THD *thd, set_var *var) + { + return update(var, global_var(st_vers_asof_timestamp)); + } + virtual bool session_update(THD *thd, set_var *var) + { + return update(var, session_var(thd, st_vers_asof_timestamp)); + } + +private: + uchar *value_ptr(THD *thd, st_vers_asof_timestamp &val) + { + switch (val.type) + { + case FOR_SYSTEM_TIME_UNSPECIFIED: + case FOR_SYSTEM_TIME_ALL: + return (uchar*) thd->strdup(asof_keywords[val.type]); + case FOR_SYSTEM_TIME_AS_OF: + { + uchar *buf= (uchar*) thd->alloc(MAX_DATE_STRING_REP_LENGTH); + if (buf &&!my_datetime_to_str(&val.ltime, (char*) buf, 6)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "versioning_asof_timestamp", "NULL (wrong datetime)"); + return (uchar*) thd->strdup("Error: wrong datetime"); + } + return buf; + } + default: + break; + } + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "versioning_asof_timestamp", "NULL (wrong range type)"); + return (uchar*) thd->strdup("Error: wrong range type"); + } + +public: + virtual uchar *session_value_ptr(THD *thd, const LEX_CSTRING *base) + { return value_ptr(thd, session_var(thd, st_vers_asof_timestamp)); } + virtual uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) + { return value_ptr(thd, global_var(st_vers_asof_timestamp)); } +}; diff --git a/sql/table.cc b/sql/table.cc index e8343903d96..72acfc5a9ee 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -67,6 +67,8 @@ LEX_CSTRING GENERAL_LOG_NAME= {STRING_WITH_LEN("general_log")}; /* SLOW_LOG name */ LEX_CSTRING SLOW_LOG_NAME= {STRING_WITH_LEN("slow_log")}; +LEX_CSTRING TRANSACTION_REG_NAME= {STRING_WITH_LEN("transaction_registry")}; + /* Keyword added as a prefix when parsing the defining expression for a virtual column read from the column definition saved in the frm file @@ -259,6 +261,9 @@ TABLE_CATEGORY get_table_category(const LEX_CSTRING *db, if (lex_string_eq(&SLOW_LOG_NAME, name) == 0) return TABLE_CATEGORY_LOG; + + if (lex_string_eq(&TRANSACTION_REG_NAME, name) == 0) + return TABLE_CATEGORY_LOG; } return TABLE_CATEGORY_USER; @@ -415,6 +420,9 @@ void TABLE_SHARE::destroy() DBUG_ENTER("TABLE_SHARE::destroy"); DBUG_PRINT("info", ("db: %s table: %s", db.str, table_name.str)); + if (versioned) + vers_destroy(); + if (ha_share) { delete ha_share; @@ -1186,6 +1194,12 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, uint len; uint ext_key_parts= 0; plugin_ref se_plugin= 0; + const uchar *system_period= 0; + bool vtmd_used= false; + share->vtmd= false; + const uchar *extra2_field_flags= 0; + size_t extra2_field_flags_length= 0; + MEM_ROOT *old_root= thd->mem_root; Virtual_column_info **table_check_constraints; DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image"); @@ -1279,6 +1293,28 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, } #endif /*HAVE_SPATIAL*/ break; + case EXTRA2_PERIOD_FOR_SYSTEM_TIME: + if (system_period || length != 2 * sizeof(uint16)) + goto err; + system_period = extra2; + break; + case EXTRA2_FIELD_FLAGS: + if (extra2_field_flags) + goto err; + extra2_field_flags= extra2; + extra2_field_flags_length= length; + break; + case EXTRA2_VTMD: + if (vtmd_used) + goto err; + share->vtmd= *extra2; + if (share->vtmd) + { + share->table_category= TABLE_CATEGORY_LOG; + share->no_replicate= true; + } + vtmd_used= true; + break; default: /* abort frm parsing if it's an unknown but important extra2 value */ if (type >= EXTRA2_ENGINE_IMPORTANT) @@ -1597,6 +1633,8 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, disk_buff= frm_image + pos + FRM_FORMINFO_SIZE; share->fields= uint2korr(forminfo+258); + if (extra2_field_flags && extra2_field_flags_length != share->fields) + goto err; pos= uint2korr(forminfo+260); /* Length of all screens */ n_length= uint2korr(forminfo+268); interval_count= uint2korr(forminfo+270); @@ -1728,6 +1766,27 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, strpos, vcol_screen_pos); } + /* Set system versioning information. */ + if (system_period == NULL) + { + versioned= false; + row_start_field = 0; + row_end_field = 0; + } + else + { + DBUG_PRINT("info", ("Setting system versioning informations")); + uint16 row_start= uint2korr(system_period); + uint16 row_end= uint2korr(system_period + sizeof(uint16)); + if (row_start >= share->fields || row_end >= share->fields) + goto err; + DBUG_PRINT("info", ("Columns with system versioning: [%d, %d]", row_start, row_end)); + versioned= true; + vers_init(); + row_start_field= row_start; + row_end_field= row_end; + } // if (system_period == NULL) + for (i=0 ; i < share->fields; i++, strpos+=field_pack_length, field_ptr++) { uint pack_flag, interval_nr, unireg_type, recpos, field_length; @@ -1742,6 +1801,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, uint gis_length, gis_decimals, srid= 0; Field::utype unireg_check; const Type_handler *handler; + uint32 flags= 0; if (new_frm_ver >= 3) { @@ -1951,6 +2011,14 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, swap_variables(uint, null_bit_pos, mysql57_vcol_null_bit_pos); } + if (versioned) + { + if (i == row_start_field) + flags|= VERS_SYS_START_FLAG; + else if (i == row_end_field) + flags|= VERS_SYS_END_FLAG; + } + /* Convert pre-10.2.2 timestamps to use Field::default_value */ unireg_check= (Field::utype) MTYP_TYPENR(unireg_type); name.str= fieldnames.type_names[i]; @@ -1962,7 +2030,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, null_pos, null_bit_pos, pack_flag, handler, charset, geom_type, srid, unireg_check, (interval_nr ? share->intervals+interval_nr-1 : NULL), - &name); + &name, flags); if (!reg_field) // Not supported field type goto err; @@ -1978,6 +2046,15 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, reg_field->field_index= i; reg_field->comment=comment; reg_field->vcol_info= vcol_info; + reg_field->flags|= flags; + if (extra2_field_flags) + { + uchar flags= *extra2_field_flags++; + if (flags & VERS_OPTIMIZED_UPDATE) + reg_field->flags|= VERS_UPDATE_UNVERSIONED_FLAG; + if (flags & HIDDEN) + reg_field->flags|= HIDDEN_FLAG; + } if (field_type == MYSQL_TYPE_BIT && !f_bit_as_char(pack_flag)) { null_bits_are_used= 1; @@ -2548,19 +2625,21 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, bitmap_clear_all(share->check_set); } - delete handler_file; #ifndef DBUG_OFF if (use_hash) (void) my_hash_check(&share->name_hash); #endif share->db_plugin= se_plugin; + delete handler_file; + share->error= OPEN_FRM_OK; thd->status_var.opened_shares++; thd->mem_root= old_root; DBUG_RETURN(0); - err: +err: + share->db_plugin= NULL; share->error= OPEN_FRM_CORRUPTED; share->open_errno= my_errno; delete handler_file; @@ -3068,25 +3147,30 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, records=0; if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) records=1; - if (prgflag & (READ_ALL+EXTRA_RECORD)) + if (prgflag & (READ_ALL + EXTRA_RECORD)) + { records++; - - if (!(record= (uchar*) alloc_root(&outparam->mem_root, - share->rec_buff_length * records))) - goto err; /* purecov: inspected */ + if (share->versioned) + records++; + } if (records == 0) { /* We are probably in hard repair, and the buffers should not be used */ - outparam->record[0]= outparam->record[1]= share->default_values; + record= share->default_values; } else { - outparam->record[0]= record; - if (records > 1) - outparam->record[1]= record+ share->rec_buff_length; - else - outparam->record[1]= outparam->record[0]; // Safety + if (!(record= (uchar*) alloc_root(&outparam->mem_root, + share->rec_buff_length * records))) + goto err; /* purecov: inspected */ + } + + for (i= 0; i < 3;) + { + outparam->record[i]= record; + if (++i < records) + record+= share->rec_buff_length; } if (!(field_ptr = (Field **) alloc_root(&outparam->mem_root, @@ -3111,6 +3195,18 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, } (*field_ptr)= 0; // End marker + if (share->versioned) + { + if (outparam->vers_update_user_field()) + goto err; + outparam->vers_write= true; + } + else + { + outparam->vers_user_field= NULL; + outparam->vers_write= false; + } + if (share->found_next_number_field) outparam->found_next_number_field= outparam->field[(uint) (share->found_next_number_field - share->field)]; @@ -3202,6 +3298,7 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, } #ifdef WITH_PARTITION_STORAGE_ENGINE + bool work_part_info_used; if (share->partition_info_str_len && outparam->file) { /* @@ -3222,7 +3319,6 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, thd->set_n_backup_active_arena(&part_func_arena, &backup_arena); thd->stmt_arena= &part_func_arena; bool tmp; - bool work_part_info_used; tmp= mysql_unpack_partition(thd, share->partition_info_str, share->partition_info_str_len, @@ -3382,6 +3478,38 @@ partititon_err: if (share->no_replicate || !binlog_filter->db_ok(share->db.str)) share->can_do_row_logging= 0; // No row based replication +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (outparam->part_info && + outparam->part_info->part_type == VERSIONING_PARTITION) + { + Query_arena *backup_stmt_arena_ptr= thd->stmt_arena; + Query_arena backup_arena; + Query_arena part_func_arena(&outparam->mem_root, + Query_arena::STMT_INITIALIZED); + if (!work_part_info_used) + { + thd->set_n_backup_active_arena(&part_func_arena, &backup_arena); + thd->stmt_arena= &part_func_arena; + } + + bool err= outparam->part_info->vers_setup_stats(thd, is_create_table); + + if (!work_part_info_used) + { + thd->stmt_arena= backup_stmt_arena_ptr; + thd->restore_active_arena(&part_func_arena, &backup_arena); + } + + if (err) + { + outparam->file->ha_close(); + error= OPEN_FRM_OPEN_ERROR; + error_reported= true; + goto err; + } + } +#endif + /* Increment the opened_tables counter, only when open flags set. */ if (db_stat) thd->status_var.opened_tables++; @@ -4629,16 +4757,20 @@ bool TABLE_LIST::create_field_translation(THD *thd) */ if (is_view() && get_unit()->prepared && !field_translation_updated) { + field_translation_updated= TRUE; + if (static_cast<uint>(field_translation_end - field_translation) < + select->item_list.elements) + goto allocate; while ((item= it++)) { field_translation[field_count++].item= item; } - field_translation_updated= TRUE; } DBUG_RETURN(FALSE); } +allocate: arena= thd->activate_stmt_arena_if_needed(&backup); /* Create view fields translation table */ @@ -6275,6 +6407,15 @@ void TABLE::mark_columns_needed_for_delete() if (need_signal) file->column_bitmaps_signal(); + + /* + For System Versioning we have to write and read Sys_end. + */ + if (s->versioned) + { + bitmap_set_bit(read_set, s->vers_end_field()->field_index); + bitmap_set_bit(write_set, s->vers_end_field()->field_index); + } } @@ -6351,6 +6492,15 @@ void TABLE::mark_columns_needed_for_update() need_signal= true; } } + /* + For System Versioning we have to read all columns since we will store + a copy of previous row with modified Sys_end column back to a table. + */ + if (s->versioned) + { + // We will copy old columns to a new row. + use_all_columns(); + } if (check_constraints) { mark_check_constraint_columns_for_read(); @@ -7589,6 +7739,66 @@ int TABLE::update_default_fields(bool update_command, bool ignore_errors) DBUG_RETURN(res); } + +void TABLE::vers_update_fields() +{ + DBUG_ENTER("vers_update_fields"); + + if (versioned_by_sql()) + { + bitmap_set_bit(write_set, vers_start_field()->field_index); + if (vers_start_field()->set_time()) + DBUG_ASSERT(0); + } + + bitmap_set_bit(write_set, vers_end_field()->field_index); + vers_end_field()->set_max(); + + DBUG_VOID_RETURN; +} + + +bool TABLE::vers_update_user_field(MEM_ROOT *_mem_root) +{ + DBUG_ASSERT(versioned()); + Field **dst= (Field **) alloc_root(_mem_root ? _mem_root : &mem_root, + (s->fields - VERSIONING_FIELDS + 1) * + sizeof(Field*)); + if (!dst) + return true; + + vers_user_field= dst; + for (Field **src= field; *src; src++) + { + if ((*src)->vers_sys_field()) + continue; + *dst++= *src; + } + (*dst)= NULL; + return false; +} + + +bool TABLE_LIST::vers_vtmd_name(String& out) const +{ + static const char *vtmd_suffix= "_vtmd"; + static const size_t vtmd_suffix_len= strlen(vtmd_suffix); + if (table_name_length > NAME_CHAR_LEN - vtmd_suffix_len) + { + my_printf_error(ER_VERS_VTMD_ERROR, "Table name is longer than %d characters", MYF(0), int(NAME_CHAR_LEN - vtmd_suffix_len)); + return true; + } + out.set(table_name, table_name_length, table_alias_charset); + if (out.append(vtmd_suffix, vtmd_suffix_len + 1)) + { + my_message(ER_VERS_VTMD_ERROR, "Failed allocate VTMD name", MYF(0)); + return true; + } + out.length(out.length() - 1); + return false; +} + + /** Reset markers that fields are being updated */ @@ -8334,6 +8544,348 @@ LEX_CSTRING *fk_option_name(enum_fk_option opt) return names + opt; } +void TABLE_SHARE::vers_destroy() +{ + mysql_mutex_destroy(&LOCK_rotation); + mysql_cond_destroy(&COND_rotation); + mysql_rwlock_destroy(&LOCK_stat_serial); + if (stat_trx) + { + for (Vers_min_max_stats** p= stat_trx; *p; ++p) + { + delete *p; + } + } +} + +TR_table::TR_table(THD* _thd, bool rw) : + thd(_thd), open_tables_backup(NULL) +{ + init_one_table(LEX_STRING_WITH_LEN(MYSQL_SCHEMA_NAME), + LEX_STRING_WITH_LEN(TRANSACTION_REG_NAME), + TRANSACTION_REG_NAME.str, rw ? TL_WRITE : TL_READ); +} + +bool TR_table::open() +{ + DBUG_ASSERT(!table); + open_tables_backup= new Open_tables_backup; + if (!open_tables_backup) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } + + All_tmp_tables_list *temporary_tables= thd->temporary_tables; + bool error= !open_log_table(thd, this, open_tables_backup); + thd->temporary_tables= temporary_tables; + + return error; +} + +TR_table::~TR_table() +{ + if (table) + { + thd->temporary_tables= NULL; + close_log_table(thd, open_tables_backup); + } + delete open_tables_backup; +} + +void TR_table::store(uint field_id, ulonglong val) +{ + table->field[field_id]->store(val, true); + table->field[field_id]->set_notnull(); +} + +void TR_table::store(uint field_id, timeval ts) +{ + table->field[field_id]->store_timestamp(ts.tv_sec, ts.tv_usec); + table->field[field_id]->set_notnull(); +} + +enum_tx_isolation TR_table::iso_level() const +{ + enum_tx_isolation res= (enum_tx_isolation) ((*this)[FLD_ISO_LEVEL]->val_int() - 1); + DBUG_ASSERT(res <= ISO_SERIALIZABLE); + return res; +} + +bool TR_table::update(ulonglong start_id, ulonglong end_id) +{ + if (!table && open()) + return true; + + timeval start_time= {thd->start_time, long(thd->start_time_sec_part)}; + thd->set_current_time(); + timeval end_time= {thd->start_time, long(thd->start_time_sec_part)}; + store(FLD_TRX_ID, start_id); + store(FLD_COMMIT_ID, end_id); + store(FLD_BEGIN_TS, start_time); + store(FLD_COMMIT_TS, end_time); + store_iso_level(thd->tx_isolation); + + int error= table->file->ha_write_row(table->record[0]); + if (error) + table->file->print_error(error, MYF(0)); + return error; +} + +#define newx new (thd->mem_root) +bool TR_table::query(ulonglong trx_id) +{ + if (!table && open()) + return false; + SQL_SELECT_auto select; + READ_RECORD info; + int error; + List<TABLE_LIST> dummy; + SELECT_LEX &slex= thd->lex->select_lex; + Name_resolution_context_backup backup(slex.context, *this); + Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_TRX_ID]); + Item *value= newx Item_int(thd, trx_id); + COND *conds= newx Item_func_eq(thd, field, value); + if ((error= setup_conds(thd, this, dummy, &conds))) + return false; + select= make_select(table, 0, 0, conds, NULL, 0, &error); + if (error || !select) + return false; + // FIXME: (performance) force index 'transaction_id' + error= init_read_record(&info, thd, table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + if (select->skip_record(thd) > 0) + return true; + } + return false; +} + +bool TR_table::query(MYSQL_TIME &commit_time, bool backwards) +{ + if (!table && open()) + return false; + SQL_SELECT_auto select; + READ_RECORD info; + int error; + List<TABLE_LIST> dummy; + SELECT_LEX &slex= thd->lex->select_lex; + Name_resolution_context_backup backup(slex.context, *this); + Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_COMMIT_TS]); + Item *value= newx Item_datetime_literal(thd, &commit_time, 6); + COND *conds; + if (backwards) + conds= newx Item_func_ge(thd, field, value); + else + conds= newx Item_func_le(thd, field, value); + if ((error= setup_conds(thd, this, dummy, &conds))) + return false; + // FIXME: (performance) force index 'commit_timestamp' + select= make_select(table, 0, 0, conds, NULL, 0, &error); + if (error || !select) + return false; + error= init_read_record(&info, thd, table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + + // With PK by transaction_id the records are ordered by PK + bool found= false; + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + if (select->skip_record(thd) > 0) + { + if (backwards) + return true; + found= true; + // TODO: (performance) make ORDER DESC and break after first found. + // Otherwise it is O(n) scan (+copy)! + store_record(table, record[1]); + } + else + { + if (found) + restore_record(table, record[1]); + if (!backwards) + break; + } + } + return found; +} +#undef newx + +bool TR_table::query_sees(bool &result, ulonglong trx_id1, ulonglong trx_id0, + ulonglong commit_id1, enum_tx_isolation iso_level1, + ulonglong commit_id0) +{ + if (trx_id1 == trx_id0) + { + return false; + } + + if (trx_id1 == ULONGLONG_MAX || trx_id0 == 0) + { + result= true; + return false; + } + + if (!commit_id1) + { + if (!query(trx_id1)) + return true; + + commit_id1= (*this)[FLD_COMMIT_ID]->val_int(); + iso_level1= iso_level(); + } + + if (!commit_id0) + { + if (!query(trx_id0)) + return true; + + commit_id0= (*this)[FLD_COMMIT_ID]->val_int(); + } + + // Trivial case: TX1 started after TX0 committed + if (trx_id1 > commit_id0 + // Concurrent transactions: TX1 committed after TX0 and TX1 is read (un)committed + || (commit_id1 > commit_id0 && iso_level1 < ISO_REPEATABLE_READ)) + { + result= true; + } + else // All other cases: TX1 does not see TX0 + { + result= false; + } + + return false; +} + +void TR_table::warn_schema_incorrect(const char *reason) +{ + if (MYSQL_VERSION_ID == table->s->mysql_version) + { + sql_print_error("`%s.%s` schema is incorrect: %s.", db, table_name, reason); + } + else + { + sql_print_error("`%s.%s` schema is incorrect: %s. Created with MariaDB %d, " + "now running %d.", db, table_name, reason, MYSQL_VERSION_ID, + static_cast<int>(table->s->mysql_version)); + } +} + +bool TR_table::check() +{ + if (!ha_resolve_by_legacy_type(thd, DB_TYPE_INNODB)) + { + sql_print_information("`%s.%s` requires InnoDB storage engine.", db, table_name); + return true; + } + + if (open()) + { + sql_print_warning("`%s.%s` does not exist (open failed).", db, table_name); + return true; + } + + if (table->file->ht->db_type != DB_TYPE_INNODB) + { + warn_schema_incorrect("Wrong table engine (expected InnoDB)"); + return true; + } + +#define WARN_SCHEMA(...) \ + char reason[128]; \ + snprintf(reason, 128, __VA_ARGS__); \ + warn_schema_incorrect(reason); + + if (table->s->fields != FIELD_COUNT) + { + WARN_SCHEMA("Wrong field count (expected %d)", FIELD_COUNT); + return true; + } + + if (table->field[FLD_TRX_ID]->type() != MYSQL_TYPE_LONGLONG) + { + WARN_SCHEMA("Wrong field %d type (expected BIGINT UNSIGNED)", FLD_TRX_ID); + return true; + } + + if (table->field[FLD_COMMIT_ID]->type() != MYSQL_TYPE_LONGLONG) + { + WARN_SCHEMA("Wrong field %d type (expected BIGINT UNSIGNED)", FLD_COMMIT_ID); + return true; + } + + if (table->field[FLD_BEGIN_TS]->type() != MYSQL_TYPE_TIMESTAMP) + { + WARN_SCHEMA("Wrong field %d type (expected TIMESTAMP(6))", FLD_BEGIN_TS); + return true; + } + + if (table->field[FLD_COMMIT_TS]->type() != MYSQL_TYPE_TIMESTAMP) + { + WARN_SCHEMA("Wrong field %d type (expected TIMESTAMP(6))", FLD_COMMIT_TS); + return true; + } + + if (table->field[FLD_ISO_LEVEL]->type() != MYSQL_TYPE_STRING || + !(table->field[FLD_ISO_LEVEL]->flags & ENUM_FLAG)) + { + wrong_enum: + WARN_SCHEMA("Wrong field %d type (expected ENUM('READ-UNCOMMITTED', " + "'READ-COMMITTED', 'REPEATABLE-READ', 'SERIALIZABLE'))", + FLD_ISO_LEVEL); + return true; + } + + Field_enum *iso_level= static_cast<Field_enum *>(table->field[FLD_ISO_LEVEL]); + st_typelib *typelib= iso_level->typelib; + + if (typelib->count != 4) + goto wrong_enum; + + if (strcmp(typelib->type_names[0], "READ-UNCOMMITTED") || + strcmp(typelib->type_names[1], "READ-COMMITTED") || + strcmp(typelib->type_names[2], "REPEATABLE-READ") || + strcmp(typelib->type_names[3], "SERIALIZABLE")) + { + goto wrong_enum; + } + + if (!table->key_info || !table->key_info->key_part) + goto wrong_pk; + + if (strcmp(table->key_info->key_part->field->field_name.str, "transaction_id")) + { + wrong_pk: + WARN_SCHEMA("Wrong PRIMARY KEY (expected `transaction_id`)"); + return true; + } + + return false; +} + +void vers_select_conds_t::resolve_units(bool timestamps_only) +{ + DBUG_ASSERT(type != FOR_SYSTEM_TIME_UNSPECIFIED); + DBUG_ASSERT(start); + if (unit_start == UNIT_AUTO) + { + unit_start= (!timestamps_only && (start->result_type() == INT_RESULT || + start->result_type() == REAL_RESULT)) ? + UNIT_TRX_ID : UNIT_TIMESTAMP; + } + if (end && unit_end == UNIT_AUTO) + { + unit_end= (!timestamps_only && (end->result_type() == INT_RESULT || + end->result_type() == REAL_RESULT)) ? + UNIT_TRX_ID : UNIT_TIMESTAMP; + } +} + Field *TABLE::find_field_by_name(LEX_CSTRING *str) const { diff --git a/sql/table.h b/sql/table.h index aded1930a6b..1dad990edab 100644 --- a/sql/table.h +++ b/sql/table.h @@ -561,6 +561,11 @@ struct TABLE_STATISTICS_CB bool histograms_are_read; }; +class Vers_min_max_stats; + +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif /** This structure is shared between different table objects. There is one @@ -737,6 +742,52 @@ struct TABLE_SHARE #endif /** + System versioning support. + */ + + bool versioned; + bool vtmd; + uint16 row_start_field; + uint16 row_end_field; + uint32 hist_part_id; + Vers_min_max_stats** stat_trx; + ulonglong stat_serial; // guards check_range_constants() updates + + bool busy_rotation; + mysql_mutex_t LOCK_rotation; + mysql_cond_t COND_rotation; + mysql_rwlock_t LOCK_stat_serial; + + void vers_init() + { + hist_part_id= UINT32_MAX; + busy_rotation= false; + stat_trx= NULL; + stat_serial= 0; + mysql_mutex_init(key_TABLE_SHARE_LOCK_rotation, &LOCK_rotation, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_TABLE_SHARE_COND_rotation, &COND_rotation, NULL); + mysql_rwlock_init(key_rwlock_LOCK_stat_serial, &LOCK_stat_serial); + } + + void vers_destroy(); + + Field *vers_start_field() + { + return field[row_start_field]; + } + + Field *vers_end_field() + { + return field[row_end_field]; + } + + void vers_wait_rotation() + { + while (busy_rotation) + mysql_cond_wait(&COND_rotation, &LOCK_rotation); + } + + /** Cache the checked structure of this table. The pointer data is used to describe the structure that @@ -1046,7 +1097,7 @@ public: uint32 instance; /** Table cache instance this TABLE is belonging to */ THD *in_use; /* Which thread uses this */ - uchar *record[2]; /* Pointer to records */ + uchar *record[3]; /* Pointer to records */ uchar *write_row_record; /* Used as optimisation in THD::write_row */ uchar *insert_values; /* used by INSERT ... UPDATE */ @@ -1079,6 +1130,7 @@ public: Field **default_field; /* Fields with non-constant DEFAULT */ Field *next_number_field; /* Set if next_number is activated */ Field *found_next_number_field; /* Set on open */ + Field **vers_user_field; /* Non-system fields */ Virtual_column_info **check_constraints; /* Table's triggers, 0 if there are no of them */ @@ -1428,6 +1480,7 @@ public: int update_virtual_field(Field *vf); int update_virtual_fields(handler *h, enum_vcol_update_mode update_mode); int update_default_fields(bool update, bool ignore_errors); + void vers_update_fields(); void reset_default_fields(); inline ha_rows stat_records() { return used_stat_records; } @@ -1435,7 +1488,9 @@ public: bool prepare_triggers_for_delete_stmt_or_event(); bool prepare_triggers_for_update_stmt_or_event(); - inline Field **field_to_fill(); + Field **field_to_fill(); + Field **vers_user_field_to_fill(); + bool vers_update_user_field(MEM_ROOT *mem_root= NULL); bool validate_default_values_of_unset_fields(THD *thd) const; bool insert_all_rows_into_tmp_table(THD *thd, @@ -1444,6 +1499,69 @@ public: bool with_cleanup); Field *find_field_by_name(LEX_CSTRING *str) const; bool export_structure(THD *thd, class Row_definition_list *defs); + + /** + System Versioning support + */ + bool vers_write; + + bool versioned() const + { + DBUG_ASSERT(s); + return s->versioned; + } + + bool versioned_write() const + { + DBUG_ASSERT(versioned() || !vers_write); + return vers_write; + } + + /* Versioned by SQL layer */ + bool versioned_by_sql() const + { + DBUG_ASSERT(s && file); + return s->versioned && !file->native_versioned(); + } + + bool versioned_by_engine() const + { + DBUG_ASSERT(s && file); + return s->versioned && file->native_versioned(); + } + + bool vers_vtmd() const + { + DBUG_ASSERT(s); + return s->versioned && s->vtmd; + } + + Field *vers_start_field() const + { + DBUG_ASSERT(s && s->versioned); + return field[s->row_start_field]; + } + + Field *vers_end_field() const + { + DBUG_ASSERT(s && s->versioned); + return field[s->row_end_field]; + } + + ulonglong vers_start_id() const; + ulonglong vers_end_id() const; + + int delete_row(); + +/** Number of additional fields used in versioned tables */ +#define VERSIONING_FIELDS 2 + + uint vers_user_fields() const + { + return s->versioned ? + s->fields - VERSIONING_FIELDS : + s->fields; + } }; @@ -1731,6 +1849,52 @@ class Item_in_subselect; 4) jtbm semi-join (jtbm_subselect != NULL) */ +enum vers_range_unit_t +{ + UNIT_AUTO = 0, + UNIT_TIMESTAMP, + UNIT_TRX_ID +}; + +/** last_leaf_for_name_resolutioning support. */ +struct vers_select_conds_t +{ + vers_range_type_t type; + vers_range_unit_t unit_start, unit_end; + bool import_outer:1; + bool from_inner:1; + Item *start, *end; + + void empty() + { + type= FOR_SYSTEM_TIME_UNSPECIFIED; + unit_start= unit_end= UNIT_AUTO; + import_outer= from_inner= false; + start= end= NULL; + } + + inline Item *fix_dec(Item *item); + + inline void init( vers_range_type_t t, vers_range_unit_t u_start, + Item * s, vers_range_unit_t u_end, Item * e); + + bool init_from_sysvar(THD *thd); + + bool operator== (vers_range_type_t b) + { + return type == b; + } + bool operator!= (vers_range_type_t b) + { + return type != b; + } + operator bool() const + { + return type != FOR_SYSTEM_TIME_UNSPECIFIED; + } + void resolve_units(bool timestamps_only); +}; + struct LEX; class Index_hint; struct TABLE_LIST @@ -1796,6 +1960,7 @@ struct TABLE_LIST const char *db, *alias, *table_name, *schema_table_name; const char *option; /* Used by cache index */ Item *on_expr; /* Used with outer join */ + Item *saved_on_expr; /* Used with SP and System Versioning */ Item *sj_on_expr; /* @@ -2188,6 +2353,12 @@ struct TABLE_LIST TABLE_LIST *find_underlying_table(TABLE *table); TABLE_LIST *first_leaf_for_name_resolution(); TABLE_LIST *last_leaf_for_name_resolution(); + + /* System Versioning */ + vers_select_conds_t vers_conditions; + bool vers_vtmd_name(String &out) const; + bool vers_force_alias; + /** @brief Find the bottom in the chain of embedded table VIEWs. @@ -2721,6 +2892,7 @@ extern LEX_CSTRING PERFORMANCE_SCHEMA_DB_NAME; extern LEX_CSTRING GENERAL_LOG_NAME; extern LEX_CSTRING SLOW_LOG_NAME; +extern LEX_CSTRING TRANSACTION_REG_NAME; /* information schema */ extern LEX_CSTRING INFORMATION_SCHEMA_NAME; @@ -2749,6 +2921,147 @@ inline void mark_as_null_row(TABLE *table) bool is_simple_order(ORDER *order); +class Open_tables_backup; + +/** Transaction Registry Table (TRT) + + This table holds transaction IDs, their corresponding times and other + transaction-related data which is used for transaction order resolution. + When versioned table marks its records lifetime with transaction IDs, + TRT is used to get their actual timestamps. */ +class TR_table: public TABLE_LIST +{ + THD *thd; + Open_tables_backup *open_tables_backup; + +public: + enum field_id_t { + FLD_TRX_ID= 0, + FLD_COMMIT_ID, + FLD_BEGIN_TS, + FLD_COMMIT_TS, + FLD_ISO_LEVEL, + FIELD_COUNT + }; + /** + @param[in,out] Thread handle + @param[in] Current transaction is read-write. + */ + TR_table(THD *_thd, bool rw= false); + /** + Opens a transaction_registry table. + + @retval true on error, false otherwise. + */ + bool open(); + ~TR_table(); + /** + @retval current thd + */ + THD *get_thd() const { return thd; } + /** + Stores value to internal transaction_registry TABLE object. + + @param[in] field number in a TABLE + @param[in] value to store + */ + void store(uint field_id, ulonglong val); + /** + Stores value to internal transaction_registry TABLE object. + + @param[in] field number in a TABLE + @param[in] value to store + */ + void store(uint field_id, timeval ts); + /** + Update the transaction_registry right before commit. + @param start_id transaction identifier at start + @param end_id transaction identifier at commit + + @retval false on success + @retval true on error (the transaction must be rolled back) + */ + bool update(ulonglong start_id, ulonglong end_id); + // return true if found; false if not found or error + bool query(ulonglong trx_id); + /** + Gets a row from transaction_registry with the closest commit_timestamp to + first argument. We can search for a value which a lesser or greater than + first argument. Also loads a row into an internal TABLE object. + + @param[in] timestamp + @param[in] true if we search for a lesser timestamp, false if greater + @retval true if exists, false it not exists or an error occured + */ + bool query(MYSQL_TIME &commit_time, bool backwards); + /** + Checks whether transaction1 sees transaction0. + + @param[out] true if transaction1 sees transaction0, undefined on error and + when transaction1=transaction0 and false otherwise + @param[in] transaction_id of transaction1 + @param[in] transaction_id of transaction0 + @param[in] commit time of transaction1 or 0 if we want it to be queried + @param[in] isolation level (from handler.h) of transaction1 + @param[in] commit time of transaction0 or 0 if we want it to be queried + @retval true on error, false otherwise + */ + bool query_sees(bool &result, ulonglong trx_id1, ulonglong trx_id0, + ulonglong commit_id1= 0, + enum_tx_isolation iso_level1= ISO_READ_UNCOMMITTED, + ulonglong commit_id0= 0); + + /** + @retval transaction isolation level of a row from internal TABLE object. + */ + enum_tx_isolation iso_level() const; + /** + Stores transactioin isolation level to internal TABLE object. + */ + void store_iso_level(enum_tx_isolation iso_level) + { + DBUG_ASSERT(iso_level <= ISO_SERIALIZABLE); + store(FLD_ISO_LEVEL, iso_level + 1); + } + + /** + Writes a message to MariaDB log about incorrect transaction_registry schema. + + @param[in] a message explained what's incorrect in schema + */ + void warn_schema_incorrect(const char *reason); + /** + Checks whether transaction_registry table has a correct schema. + + @retval true if schema is incorrect and false otherwise + */ + bool check(); + + TABLE * operator-> () const + { + return table; + } + Field * operator[] (uint field_id) const + { + DBUG_ASSERT(field_id < FIELD_COUNT); + return table->field[field_id]; + } + operator bool () const + { + return table; + } + bool operator== (const TABLE_LIST &subj) const + { + if (0 != strcmp(db, subj.db)) + return false; + return (0 == strcmp(table_name, subj.table_name)); + } + bool operator!= (const TABLE_LIST &subj) const + { + return !(*this == subj); + } +}; + #endif /* MYSQL_CLIENT */ #endif /* TABLE_INCLUDED */ diff --git a/sql/tztime.h b/sql/tztime.h index eb7d85c48b2..7ffc36011e1 100644 --- a/sql/tztime.h +++ b/sql/tztime.h @@ -89,6 +89,5 @@ extern my_time_t sec_since_epoch_TIME(MYSQL_TIME *t); static const int MY_TZ_TABLES_COUNT= 4; - #endif /* !defined(TESTTIME) && !defined(TZINFO2SQL) */ #endif /* TZTIME_INCLUDED */ diff --git a/sql/unireg.cc b/sql/unireg.cc index 1ca0233552e..5da14e31243 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -87,6 +87,46 @@ static uchar *extra2_write(uchar *pos, enum extra2_frm_value_type type, return extra2_write(pos, type, reinterpret_cast<LEX_CSTRING *>(str)); } +static const bool ROW_START = true; +static const bool ROW_END = false; + +inline +uint16 +vers_get_field(HA_CREATE_INFO *create_info, List<Create_field> &create_fields, bool row_start) +{ + DBUG_ASSERT(create_info->versioned()); + + List_iterator<Create_field> it(create_fields); + Create_field *sql_field = NULL; + + const LString_i row_field= row_start ? create_info->vers_info.as_row.start + : create_info->vers_info.as_row.end; + DBUG_ASSERT(row_field); + + for (unsigned field_no = 0; (sql_field = it++); ++field_no) + { + if (row_field == sql_field->field_name) + { + DBUG_ASSERT(field_no <= uint16(~0U)); + return uint16(field_no); + } + } + + DBUG_ASSERT(0); /* Not Reachable */ + return 0; +} + +bool has_extra2_field_flags(List<Create_field> &create_fields) +{ + List_iterator<Create_field> it(create_fields); + while (Create_field *f= it++) + { + if (f->flags & (VERS_UPDATE_UNVERSIONED_FLAG | HIDDEN_FLAG)) + return true; + } + return false; +} + /** Create a frm (table definition) file @@ -219,6 +259,22 @@ LEX_CUSTRING build_frm_image(THD *thd, const char *table, if (gis_extra2_len) extra2_size+= 1 + (gis_extra2_len > 255 ? 3 : 1) + gis_extra2_len; + if (create_info->versioned()) + { + extra2_size+= 1 + 1 + 2 * sizeof(uint16); + } + + if (create_info->vtmd()) + { + extra2_size+= 1 + 1 + 1; + } + + bool has_extra2_field_flags_= has_extra2_field_flags(create_fields); + if (has_extra2_field_flags_) + { + extra2_size+= + 1 + (create_fields.elements <= 255 ? 1 : 3) + create_fields.elements; + } key_buff_length= uint4korr(fileinfo+47); @@ -275,6 +331,39 @@ LEX_CUSTRING build_frm_image(THD *thd, const char *table, } #endif /*HAVE_SPATIAL*/ + if (create_info->versioned()) + { + *pos++= EXTRA2_PERIOD_FOR_SYSTEM_TIME; + *pos++= 2 * sizeof(uint16); + int2store(pos, vers_get_field(create_info, create_fields, ROW_START)); + pos+= sizeof(uint16); + int2store(pos, vers_get_field(create_info, create_fields, ROW_END)); + pos+= sizeof(uint16); + } + + if (create_info->vtmd()) + { + *pos++= EXTRA2_VTMD; + *pos++= 1; + *pos++= 1; + } + + if (has_extra2_field_flags_) + { + *pos++= EXTRA2_FIELD_FLAGS; + pos= extra2_write_len(pos, create_fields.elements); + List_iterator<Create_field> it(create_fields); + while (Create_field *field= it++) + { + uchar flags= 0; + if (field->flags & VERS_UPDATE_UNVERSIONED_FLAG) + flags|= VERS_OPTIMIZED_UPDATE; + if (field->flags & HIDDEN_FLAG) + flags|= HIDDEN; + *pos++= flags; + } + } + int4store(pos, filepos); // end of the extra2 segment pos+= 4; @@ -960,7 +1049,8 @@ static bool make_empty_rec(THD *thd, uchar *buff, uint table_options, field->unireg_check, field->save_interval ? field->save_interval : field->interval, - &field->field_name); + &field->field_name, + field->flags); if (!regfield) { error= 1; diff --git a/sql/unireg.h b/sql/unireg.h index b0cfb3841ef..25cb22207ad 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -172,12 +172,20 @@ enum extra2_frm_value_type { EXTRA2_TABLEDEF_VERSION=0, EXTRA2_DEFAULT_PART_ENGINE=1, EXTRA2_GIS=2, + EXTRA2_PERIOD_FOR_SYSTEM_TIME=4, + EXTRA2_FIELD_FLAGS=8, + EXTRA2_VTMD=16, #define EXTRA2_ENGINE_IMPORTANT 128 EXTRA2_ENGINE_TABLEOPTS=128, }; +enum extra2_field_flags { + VERS_OPTIMIZED_UPDATE=1, + HIDDEN=2, +}; + int rea_create_table(THD *thd, LEX_CUSTRING *frm, const char *path, const char *db, const char *table_name, HA_CREATE_INFO *create_info, handler *file, diff --git a/sql/vers_string.h b/sql/vers_string.h new file mode 100644 index 00000000000..6d501e1b81c --- /dev/null +++ b/sql/vers_string.h @@ -0,0 +1,133 @@ +#ifndef VERS_STRING_INCLUDED +#define VERS_STRING_INCLUDED + +struct Compare_strncmp +{ + int operator()(const LEX_CSTRING& a, const LEX_CSTRING& b) const + { + return strncmp(a.str, b.str, a.length); + } + static CHARSET_INFO* charset() + { + return system_charset_info; + } +}; + +template <CHARSET_INFO* &CS= system_charset_info> +struct Compare_my_strcasecmp +{ + int operator()(const LEX_CSTRING& a, const LEX_CSTRING& b) const + { + DBUG_ASSERT(a.str[a.length] == 0 && b.str[b.length] == 0); + return my_strcasecmp(CS, a.str, b.str); + } + static CHARSET_INFO* charset() + { + return CS; + } +}; + +typedef Compare_my_strcasecmp<files_charset_info> Compare_fs; +typedef Compare_my_strcasecmp<table_alias_charset> Compare_t; + +template <class Storage= LEX_CSTRING> +struct LEX_STRING_u : public Storage +{ + LEX_STRING_u() + { + Storage::str= NULL; + Storage::length= 0; + } + LEX_STRING_u(const char *_str, uint32 _len, CHARSET_INFO *) + { + Storage::str= _str; + Storage::length= _len; + } + uint32 length() const + { + return Storage::length; + } + const char *ptr() const + { + return Storage::str; + } + void set(const char *_str, uint32 _len, CHARSET_INFO *) + { + Storage::str= _str; + Storage::length= _len; + } + const LEX_CSTRING& lex_cstring() const + { + return *this; + } + const LEX_STRING& lex_string() const + { + return *(LEX_STRING *)this; + } +}; + +template <class Compare= Compare_strncmp, class Storage= LEX_STRING_u<> > +struct XString : public Storage +{ +public: + XString() {} + XString(const char *_str, size_t _len) : + Storage(_str, _len, Compare::charset()) + { + } + XString(const LEX_STRING src) : + Storage(src.str, src.length, Compare::charset()) + { + } + XString(const LEX_CSTRING src) : + Storage(src.str, src.length, Compare::charset()) + { + } + XString(const char *_str) : + Storage(_str, strlen(_str), Compare::charset()) + { + } + bool operator== (const XString& b) const + { + return Storage::length() == b.length() && 0 == Compare()(this->lex_cstring(), b.lex_cstring()); + } + bool operator!= (const XString& b) const + { + return !(*this == b); + } + operator const char* () const + { + return Storage::ptr(); + } + operator LEX_CSTRING& () const + { + return this->lex_cstring(); + } + operator LEX_STRING () const + { + LEX_STRING res; + res.str= const_cast<char *>(this->ptr()); + res.length= this->length(); + return res; + } + operator bool () const + { + return Storage::ptr() != NULL; + } +}; + +typedef XString<> LString; +typedef XString<Compare_fs> LString_fs; +typedef XString<Compare_my_strcasecmp<> > LString_i; + +typedef XString<Compare_strncmp, String> SString; +typedef XString<Compare_fs, String> SString_fs; +typedef XString<Compare_t, String> SString_t; + + +#define XSTRING_WITH_LEN(X) (X).ptr(), (X).length() +#define DB_WITH_LEN(X) (X).db, (X).db_length +#define TABLE_NAME_WITH_LEN(X) (X).table_name, (X).table_name_length + + +#endif // VERS_STRING_INCLUDED diff --git a/sql/vers_utils.h b/sql/vers_utils.h new file mode 100644 index 00000000000..948139bfa9b --- /dev/null +++ b/sql/vers_utils.h @@ -0,0 +1,68 @@ +#ifndef VERS_UTILS_INCLUDED +#define VERS_UTILS_INCLUDED + +#include "table.h" +#include "sql_class.h" +#include "vers_string.h" + +class MDL_auto_lock +{ + THD *thd; + TABLE_LIST &table; + bool error; + +public: + MDL_auto_lock(THD *_thd, TABLE_LIST &_table) : + thd(_thd), table(_table) + { + DBUG_ASSERT(thd); + table.mdl_request.init(MDL_key::TABLE, table.db, table.table_name, MDL_EXCLUSIVE, MDL_EXPLICIT); + error= thd->mdl_context.acquire_lock(&table.mdl_request, thd->variables.lock_wait_timeout); + } + ~MDL_auto_lock() + { + if (!error) + { + DBUG_ASSERT(table.mdl_request.ticket); + thd->mdl_context.release_lock(table.mdl_request.ticket); + table.mdl_request.ticket= NULL; + } + } + bool acquire_error() const { return error; } +}; + + +class Local_da : public Diagnostics_area +{ + THD *thd; + uint sql_error; + Diagnostics_area *saved_da; + +public: + Local_da(THD *_thd, uint _sql_error= 0) : + Diagnostics_area(_thd->query_id, false, true), + thd(_thd), + sql_error(_sql_error), + saved_da(_thd->get_stmt_da()) + { + thd->set_stmt_da(this); + } + ~Local_da() + { + if (saved_da) + finish(); + } + void finish() + { + DBUG_ASSERT(saved_da && thd); + thd->set_stmt_da(saved_da); + if (is_error()) + my_error(sql_error ? sql_error : sql_errno(), MYF(0), message()); + if (warn_count() > error_count()) + saved_da->copy_non_errors_from_wi(thd, get_warning_info()); + saved_da= NULL; + } +}; + + +#endif // VERS_UTILS_INCLUDED diff --git a/sql/vtmd.cc b/sql/vtmd.cc new file mode 100644 index 00000000000..ebab4bc4c92 --- /dev/null +++ b/sql/vtmd.cc @@ -0,0 +1,695 @@ +#include "vtmd.h" +#include "sql_base.h" +#include "sql_class.h" +#include "sql_handler.h" // mysql_ha_rm_tables() +#include "sql_table.h" +#include "sql_select.h" +#include "table_cache.h" // tdc_remove_table() +#include "key.h" +#include "sql_show.h" +#include "sql_parse.h" +#include "sql_lex.h" +#include "sp_head.h" +#include "sp_rcontext.h" + +LString VERS_VTMD_TEMPLATE(C_STRING_WITH_LEN("vtmd_template")); + +bool +VTMD_table::create(THD *thd) +{ + Table_specification_st create_info; + TABLE_LIST src_table, table; + create_info.init(DDL_options_st::OPT_LIKE); + create_info.options|= HA_VTMD; + create_info.alias= vtmd_name; + table.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_READ); + src_table.init_one_table( + LEX_STRING_WITH_LEN(MYSQL_SCHEMA_NAME), + XSTRING_WITH_LEN(VERS_VTMD_TEMPLATE), + VERS_VTMD_TEMPLATE, + TL_READ); + + Query_tables_backup backup(thd); + thd->lex->add_to_query_tables(&src_table); + + MDL_auto_lock mdl_lock(thd, table); + if (mdl_lock.acquire_error()) + return true; + + Reprepare_observer *reprepare_observer= thd->m_reprepare_observer; + partition_info *work_part_info= thd->work_part_info; + thd->m_reprepare_observer= NULL; + thd->work_part_info= NULL; + bool rc= mysql_create_like_table(thd, &table, &src_table, &create_info); + thd->m_reprepare_observer= reprepare_observer; + thd->work_part_info= work_part_info; + return rc; +} + +bool +VTMD_table::find_record(ulonglong sys_trx_end, bool &found) +{ + int error; + key_buf_t key; + found= false; + + DBUG_ASSERT(vtmd.table); + + if (key.allocate(vtmd.table->s->max_unique_length)) + return true; + + DBUG_ASSERT(sys_trx_end); + vtmd.table->vers_end_field()->set_notnull(); + vtmd.table->vers_end_field()->store(sys_trx_end, true); + key_copy(key, vtmd.table->record[0], vtmd.table->key_info + IDX_TRX_END, 0); + + error= vtmd.table->file->ha_index_read_idx_map(vtmd.table->record[1], IDX_TRX_END, + key, + HA_WHOLE_KEY, + HA_READ_KEY_EXACT); + if (error) + { + if (error == HA_ERR_RECORD_DELETED || error == HA_ERR_KEY_NOT_FOUND) + return false; + vtmd.table->file->print_error(error, MYF(0)); + return true; + } + + restore_record(vtmd.table, record[1]); + + found= true; + return false; +} + + +bool +VTMD_table::open(THD *thd, Local_da &local_da, bool *created) +{ + if (created) + *created= false; + + if (0 == vtmd_name.length() && about.vers_vtmd_name(vtmd_name)) + return true; + + while (true) // max 2 iterations + { + vtmd.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_WRITE_CONCURRENT_INSERT); + + TABLE *res= open_log_table(thd, &vtmd, &open_tables_backup); + if (res) + return false; + + if (created && !*created && local_da.is_error() && local_da.sql_errno() == ER_NO_SUCH_TABLE) + { + local_da.reset_diagnostics_area(); + if (create(thd)) + break; + *created= true; + } + else + break; + } + return true; +} + +bool +VTMD_table::update(THD *thd, const char* archive_name) +{ + bool result= true; + bool found= false; + bool created; + int error; + size_t an_len= 0; + ulonglong save_thd_options; + { + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + + save_thd_options= thd->variables.option_bits; + thd->variables.option_bits&= ~OPTION_BIN_LOG; + + if (open(thd, local_da, &created)) + goto open_error; + + if (!vtmd.table->versioned()) + { + my_message(ER_VERS_VTMD_ERROR, "VTMD is not versioned", MYF(0)); + goto quit; + } + + if (!created && find_record(ULONGLONG_MAX, found)) + goto quit; + + if ((error= vtmd.table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE))) + { + vtmd.table->file->print_error(error, MYF(0)); + goto quit; + } + + /* Honor next number columns if present */ + vtmd.table->next_number_field= vtmd.table->found_next_number_field; + + if (vtmd.table->s->fields != FIELD_COUNT) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` unexpected fields count: %d", MYF(0), + vtmd.table->s->db.str, vtmd.table->s->table_name.str, vtmd.table->s->fields); + goto quit; + } + + if (archive_name) + { + an_len= strlen(archive_name); + vtmd.table->field[FLD_ARCHIVE_NAME]->store(archive_name, an_len, table_alias_charset); + vtmd.table->field[FLD_ARCHIVE_NAME]->set_notnull(); + } + else + { + vtmd.table->field[FLD_ARCHIVE_NAME]->set_null(); + } + vtmd.table->field[FLD_COL_RENAMES]->set_null(); + + if (found) + { + if (thd->lex->sql_command == SQLCOM_CREATE_TABLE) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` exists and not empty!", MYF(0), + vtmd.table->s->db.str, vtmd.table->s->table_name.str); + goto quit; + } + vtmd.table->mark_columns_needed_for_update(); // not needed? + if (archive_name) + { + vtmd.table->vers_write= false; + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + vtmd.table->vers_write= true; + + if (!error) + { + if (thd->lex->sql_command == SQLCOM_DROP_TABLE) + { + error= vtmd.table->file->ha_delete_row(vtmd.table->record[0]); + } + else + { + DBUG_ASSERT(thd->lex->sql_command == SQLCOM_ALTER_TABLE); + ulonglong sys_trx_end= (ulonglong) vtmd.table->vers_start_field()->val_int(); + store_record(vtmd.table, record[1]); + vtmd.table->field[FLD_NAME]->store(TABLE_NAME_WITH_LEN(about), system_charset_info); + vtmd.table->field[FLD_NAME]->set_notnull(); + vtmd.table->field[FLD_ARCHIVE_NAME]->set_null(); + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + if (error) + goto err; + + DBUG_ASSERT(an_len); + while (true) + { // fill archive_name of last sequential renames + bool found; + if (find_record(sys_trx_end, found)) + goto quit; + if (!found || !vtmd.table->field[FLD_ARCHIVE_NAME]->is_null()) + break; + + store_record(vtmd.table, record[1]); + vtmd.table->field[FLD_ARCHIVE_NAME]->store(archive_name, an_len, table_alias_charset); + vtmd.table->field[FLD_ARCHIVE_NAME]->set_notnull(); + vtmd.table->vers_write= false; + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + vtmd.table->vers_write= true; + if (error) + goto err; + sys_trx_end= (ulonglong) vtmd.table->vers_start_field()->val_int(); + } // while (true) + } // else (thd->lex->sql_command != SQLCOM_DROP_TABLE) + } // if (!error) + } // if (archive_name) + else + { + vtmd.table->field[FLD_NAME]->store(TABLE_NAME_WITH_LEN(about), system_charset_info); + vtmd.table->field[FLD_NAME]->set_notnull(); + error= vtmd.table->file->ha_update_row(vtmd.table->record[1], vtmd.table->record[0]); + } + } // if (found) + else + { + vtmd.table->field[FLD_NAME]->store(TABLE_NAME_WITH_LEN(about), system_charset_info); + vtmd.table->field[FLD_NAME]->set_notnull(); + vtmd.table->mark_columns_needed_for_insert(); // not needed? + error= vtmd.table->file->ha_write_row(vtmd.table->record[0]); + } + + if (error) + { +err: + vtmd.table->file->print_error(error, MYF(0)); + } + else + result= local_da.is_error(); + } + +quit: + if (!result && vtmd.table->file->ht->prepare_commit_versioned) + { + DBUG_ASSERT(use_transaction_registry); // FIXME: disable survival mode while TRT is disabled + TR_table trt(thd, true); + ulonglong trx_start_id= 0; + ulonglong trx_end_id= vtmd.table->file->ht->prepare_commit_versioned(thd, &trx_start_id); + result= trx_end_id && trt.update(trx_start_id, trx_end_id); + } + + close_log_table(thd, &open_tables_backup); + +open_error: + thd->variables.option_bits= save_thd_options; + return result; +} + +bool +VTMD_rename::move_archives(THD *thd, LString &new_db) +{ + vtmd.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_READ); + int error; + bool rc= false; + SString_fs archive; + bool end_keyread= false; + bool index_end= false; + Open_tables_backup open_tables_backup; + key_buf_t key; + + TABLE *res= open_log_table(thd, &vtmd, &open_tables_backup); + if (!res) + return true; + + if (key.allocate(vtmd.table->key_info[IDX_ARCHIVE_NAME].key_length)) + { + close_log_table(thd, &open_tables_backup); + return true; + } + + if ((error= vtmd.table->file->ha_start_keyread(IDX_ARCHIVE_NAME))) + goto err; + end_keyread= true; + + if ((error= vtmd.table->file->ha_index_init(IDX_ARCHIVE_NAME, true))) + goto err; + index_end= true; + + error= vtmd.table->file->ha_index_first(vtmd.table->record[0]); + while (!error) + { + if (!vtmd.table->field[FLD_ARCHIVE_NAME]->is_null()) + { + vtmd.table->field[FLD_ARCHIVE_NAME]->val_str(&archive); + key_copy(key, + vtmd.table->record[0], + &vtmd.table->key_info[IDX_ARCHIVE_NAME], + vtmd.table->key_info[IDX_ARCHIVE_NAME].key_length, + false); + error= vtmd.table->file->ha_index_read_map( + vtmd.table->record[0], + key, + vtmd.table->key_info[IDX_ARCHIVE_NAME].ext_key_part_map, + HA_READ_PREFIX_LAST); + if (!error) + { + if ((rc= move_table(thd, archive, new_db))) + break; + + error= vtmd.table->file->ha_index_next(vtmd.table->record[0]); + } + } + else + { + archive.length(0); + error= vtmd.table->file->ha_index_next(vtmd.table->record[0]); + } + } + + if (error && error != HA_ERR_END_OF_FILE) + { +err: + vtmd.table->file->print_error(error, MYF(0)); + rc= true; + } + + if (index_end) + vtmd.table->file->ha_index_end(); + if (end_keyread) + vtmd.table->file->ha_end_keyread(); + + close_log_table(thd, &open_tables_backup); + return rc; +} + +bool +VTMD_rename::move_table(THD *thd, SString_fs &table_name, LString &new_db) +{ + handlerton *table_hton= NULL; + if (!ha_table_exists(thd, about.db, table_name, &table_hton) || !table_hton) + { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_VTMD_ERROR, + "`%s.%s` archive doesn't exist", + about.db, table_name.ptr()); + return false; + } + + if (ha_table_exists(thd, new_db, table_name)) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` archive already exists!", MYF(0), + new_db.ptr(), table_name.ptr()); + return true; + } + + TABLE_LIST tl; + tl.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(table_name), + table_name, + TL_WRITE_ONLY); + tl.mdl_request.set_type(MDL_EXCLUSIVE); + + mysql_ha_rm_tables(thd, &tl); + if (lock_table_names(thd, &tl, 0, thd->variables.lock_wait_timeout, 0)) + return true; + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, about.db, table_name, false); + + bool rc= mysql_rename_table( + table_hton, + about.db, table_name, + new_db, table_name, + NO_FK_CHECKS); + if (!rc) + query_cache_invalidate3(thd, &tl, 0); + + return rc; +} + +bool +VTMD_rename::try_rename(THD *thd, LString new_db, LString new_alias, const char *archive_name) +{ + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + TABLE_LIST new_table; + + if (check_exists(thd)) + return true; + + new_table.init_one_table( + XSTRING_WITH_LEN(new_db), + XSTRING_WITH_LEN(new_alias), + new_alias, TL_READ); + + if (new_table.vers_vtmd_name(vtmd_new_name)) + return true; + + if (ha_table_exists(thd, new_db, vtmd_new_name)) + { + if (exists) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` table already exists!", MYF(0), + new_db.ptr(), vtmd_new_name.ptr()); + return true; + } + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_VTMD_ERROR, + "`%s.%s` table already exists!", + new_db.ptr(), vtmd_new_name.ptr()); + return false; + } + + if (!exists) + return false; + + bool same_db= true; + if (LString_fs(DB_WITH_LEN(about)) != LString_fs(new_db)) + { + // Move archives before VTMD so if the operation is interrupted, it could be continued. + if (move_archives(thd, new_db)) + return true; + same_db= false; + } + + TABLE_LIST vtmd_tl; + vtmd_tl.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_name), + vtmd_name, + TL_WRITE_ONLY); + vtmd_tl.mdl_request.set_type(MDL_EXCLUSIVE); + + mysql_ha_rm_tables(thd, &vtmd_tl); + if (lock_table_names(thd, &vtmd_tl, 0, thd->variables.lock_wait_timeout, 0)) + return true; + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, about.db, vtmd_name, false); + if (local_da.is_error()) // just safety check + return true; + bool rc= mysql_rename_table(hton, + about.db, vtmd_name, + new_db, vtmd_new_name, + NO_FK_CHECKS); + if (!rc) + { + query_cache_invalidate3(thd, &vtmd_tl, 0); + if (same_db || archive_name || new_alias != LString(TABLE_NAME_WITH_LEN(about))) + { + local_da.finish(); + VTMD_table new_vtmd(new_table); + rc= new_vtmd.update(thd, archive_name); + } + } + return rc; +} + +bool +VTMD_rename::revert_rename(THD *thd, LString new_db) +{ + DBUG_ASSERT(hton); + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + + TABLE_LIST vtmd_tl; + vtmd_tl.init_one_table( + DB_WITH_LEN(about), + XSTRING_WITH_LEN(vtmd_new_name), + vtmd_new_name, + TL_WRITE_ONLY); + vtmd_tl.mdl_request.set_type(MDL_EXCLUSIVE); + mysql_ha_rm_tables(thd, &vtmd_tl); + if (lock_table_names(thd, &vtmd_tl, 0, thd->variables.lock_wait_timeout, 0)) + return true; + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, new_db, vtmd_new_name, false); + + bool rc= mysql_rename_table( + hton, + new_db, vtmd_new_name, + new_db, vtmd_name, + NO_FK_CHECKS); + + if (!rc) + query_cache_invalidate3(thd, &vtmd_tl, 0); + + return rc; +} + +void +VTMD_table::archive_name( + THD* thd, + const char* table_name, + char* new_name, + size_t new_name_size) +{ + const MYSQL_TIME now= thd->query_start_TIME(); + my_snprintf(new_name, new_name_size, "%s_%04d%02d%02d_%02d%02d%02d_%06d", + table_name, now.year, now.month, now.day, now.hour, now.minute, + now.second, now.second_part); +} + +bool +VTMD_table::find_archive_name(THD *thd, String &out) +{ + READ_RECORD info; + int error; + SQL_SELECT *select= NULL; + COND *conds= NULL; + List<TABLE_LIST> dummy; + SELECT_LEX &select_lex= thd->lex->select_lex; + + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + if (open(thd, local_da)) + return true; + + Name_resolution_context &ctx= thd->lex->select_lex.context; + TABLE_LIST *table_list= ctx.table_list; + TABLE_LIST *first_name_resolution_table= ctx.first_name_resolution_table; + table_map map = vtmd.table->map; + ctx.table_list= &vtmd; + ctx.first_name_resolution_table= &vtmd; + vtmd.table->map= 1; + + vtmd.vers_conditions= about.vers_conditions; + if ((error= select_lex.vers_setup_conds(thd, &vtmd, &conds)) || + (error= setup_conds(thd, &vtmd, dummy, &conds))) + goto err; + + select= make_select(vtmd.table, 0, 0, conds, NULL, 0, &error); + if (error) + goto loc_err; + + error= init_read_record(&info, thd, vtmd.table, select, NULL, + 1 /* use_record_cache */, true /* print_error */, + false /* disable_rr_cache */); + if (error) + goto loc_err; + + while (!(error= info.read_record()) && !thd->killed && !thd->is_error()) + { + if (!select || select->skip_record(thd) > 0) + { + vtmd.table->field[FLD_ARCHIVE_NAME]->val_str(&out); + break; + } + } + + if (error < 0) + my_error(ER_NO_SUCH_TABLE, MYF(0), about.db, about.alias); + +loc_err: + end_read_record(&info); +err: + delete select; + ctx.table_list= table_list; + ctx.first_name_resolution_table= first_name_resolution_table; + vtmd.table->map= map; + close_log_table(thd, &open_tables_backup); + DBUG_ASSERT(!error || local_da.is_error()); + return error; +} + +static +bool +get_vtmd_tables(THD *thd, const char *db, + size_t db_length, Dynamic_array<LEX_CSTRING *> &table_names) +{ + LOOKUP_FIELD_VALUES lookup_field_values= { + {db, db_length}, {C_STRING_WITH_LEN("%_vtmd")}, false, true}; + + int res= make_table_name_list(thd, &table_names, thd->lex, &lookup_field_values, + &lookup_field_values.db_value); + + return res; +} + +bool +VTMD_table::get_archive_tables(THD *thd, const char *db, size_t db_length, + Dynamic_array<String> &result) +{ + Dynamic_array<LEX_CSTRING *> vtmd_tables; + if (get_vtmd_tables(thd, db, db_length, vtmd_tables)) + return true; + + Local_da local_da(thd, ER_VERS_VTMD_ERROR); + for (uint i= 0; i < vtmd_tables.elements(); i++) + { + LEX_CSTRING table_name= *vtmd_tables.at(i); + + Open_tables_backup open_tables_backup; + TABLE_LIST table_list; + table_list.init_one_table(db, db_length, LEX_STRING_WITH_LEN(table_name), + table_name.str, TL_READ); + + TABLE *table= open_log_table(thd, &table_list, &open_tables_backup); + if (!table || !table->vers_vtmd()) + { + if (table) + close_log_table(thd, &open_tables_backup); + else + { + if (local_da.is_error() && local_da.sql_errno() == ER_NOT_LOG_TABLE) + local_da.reset_diagnostics_area(); + else + return true; + } + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_VERS_VTMD_ERROR, + "Table `%s.%s` is not a VTMD table", + db, table_name.str); + continue; + } + + READ_RECORD read_record; + int error= 0; + SQL_SELECT *sql_select= make_select(table, 0, 0, NULL, NULL, 0, &error); + if (error) + { + close_log_table(thd, &open_tables_backup); + return true; + } + error= init_read_record(&read_record, thd, table, sql_select, NULL, 1, 1, false); + if (error) + { + delete sql_select; + close_log_table(thd, &open_tables_backup); + return true; + } + + while (!(error= read_record.read_record())) + { + Field *field= table->field[FLD_ARCHIVE_NAME]; + if (field->is_null()) + continue; + + String archive_name; + field->val_str(&archive_name); + archive_name.set_ascii(strmake_root(thd->mem_root, archive_name.c_ptr(), + archive_name.length()), + archive_name.length()); + result.push(archive_name); + } + // check for EOF + if (!thd->is_error()) + error= 0; + + end_read_record(&read_record); + delete sql_select; + close_log_table(thd, &open_tables_backup); + } + + return false; +} + +bool VTMD_table::setup_select(THD* thd) +{ + SString archive_name; + if (find_archive_name(thd, archive_name)) + return true; + + if (archive_name.length() == 0) + return false; + + about.table_name= (char *) thd->memdup(archive_name.c_ptr_safe(), archive_name.length() + 1); + about.table_name_length= archive_name.length(); + DBUG_ASSERT(!about.mdl_request.ticket); + about.mdl_request.init(MDL_key::TABLE, about.db, about.table_name, + about.mdl_request.type, about.mdl_request.duration); + about.vers_force_alias= true; + // Since we modified SELECT_LEX::table_list, we need to invalidate current SP + if (thd->spcont) + { + DBUG_ASSERT(thd->spcont->m_sp); + thd->spcont->m_sp->set_sp_cache_version(ULONG_MAX); + } + return false; +} diff --git a/sql/vtmd.h b/sql/vtmd.h new file mode 100644 index 00000000000..d77ed117248 --- /dev/null +++ b/sql/vtmd.h @@ -0,0 +1,187 @@ +#ifndef VTMD_INCLUDED +#define VTMD_INCLUDED + +#include <mysqld_error.h> + +#include "mariadb.h" +#include "sql_priv.h" + +#include "my_sys.h" +#include "table.h" +#include "unireg.h" + +#include "vers_utils.h" + +class key_buf_t +{ + uchar* buf; + + key_buf_t(const key_buf_t&); // disabled + key_buf_t& operator= (const key_buf_t&); // disabled + +public: + key_buf_t() : buf(NULL) + {} + + ~key_buf_t() + { + if (buf) + my_free(buf); + } + + bool allocate(size_t alloc_size) + { + DBUG_ASSERT(!buf); + buf= static_cast<uchar *>(my_malloc(alloc_size, MYF(0))); + if (!buf) + { + my_message(ER_VERS_VTMD_ERROR, "failed to allocate key buffer", MYF(0)); + return true; + } + return false; + } + + operator uchar* () + { + DBUG_ASSERT(buf); + return reinterpret_cast<uchar *>(buf); + } +}; + +class THD; + +class VTMD_table +{ + Open_tables_backup open_tables_backup; + +protected: + TABLE_LIST vtmd; + TABLE_LIST &about; + SString_t vtmd_name; + +private: + VTMD_table(const VTMD_table&); // prohibit copying references + +public: + enum { + FLD_START= 0, + FLD_END, + FLD_NAME, + FLD_ARCHIVE_NAME, + FLD_COL_RENAMES, + FIELD_COUNT + }; + + enum { + IDX_TRX_END= 0, + IDX_ARCHIVE_NAME + }; + + VTMD_table(TABLE_LIST &_about) : + about(_about) + { + vtmd.table= NULL; + } + + bool create(THD *thd); + bool find_record(ulonglong sys_trx_end, bool &found); + bool open(THD *thd, Local_da &local_da, bool *created= NULL); + bool update(THD *thd, const char* archive_name= NULL); + bool setup_select(THD *thd); + + static void archive_name(THD *thd, const char *table_name, char *new_name, size_t new_name_size); + void archive_name(THD *thd, char *new_name, size_t new_name_size) + { + archive_name(thd, about.table_name, new_name, new_name_size); + } + + bool find_archive_name(THD *thd, String &out); + static bool get_archive_tables(THD *thd, const char *db, size_t db_length, + Dynamic_array<String> &result); +}; + +class VTMD_exists : public VTMD_table +{ +protected: + handlerton *hton; + +public: + bool exists; + +public: + VTMD_exists(TABLE_LIST &_about) : + VTMD_table(_about), + hton(NULL), + exists(false) + {} + + bool check_exists(THD *thd); // returns error status +}; + +class VTMD_rename : public VTMD_exists +{ + SString_t vtmd_new_name; + +public: + VTMD_rename(TABLE_LIST &_about) : + VTMD_exists(_about) + {} + + bool try_rename(THD *thd, LString new_db, LString new_alias, const char* archive_name= NULL); + bool revert_rename(THD *thd, LString new_db); + +private: + bool move_archives(THD *thd, LString &new_db); + bool move_table(THD *thd, SString_fs &table_name, LString &new_db); +}; + +class VTMD_drop : public VTMD_exists +{ + char archive_name_[NAME_CHAR_LEN]; + +public: + VTMD_drop(TABLE_LIST &_about) : + VTMD_exists(_about) + { + *archive_name_= 0; + } + + const char* archive_name(THD *thd) + { + VTMD_table::archive_name(thd, archive_name_, sizeof(archive_name_)); + return archive_name_; + } + + const char* archive_name() const + { + DBUG_ASSERT(*archive_name_); + return archive_name_; + } + + bool update(THD *thd) + { + DBUG_ASSERT(*archive_name_); + return VTMD_exists::update(thd, archive_name_); + } +}; + + +inline +bool +VTMD_exists::check_exists(THD *thd) +{ + if (about.vers_vtmd_name(vtmd_name)) + return true; + + exists= ha_table_exists(thd, about.db, vtmd_name, &hton); + + if (exists && !hton) + { + my_printf_error(ER_VERS_VTMD_ERROR, "`%s.%s` handlerton empty!", MYF(0), + about.db, vtmd_name.ptr()); + return true; + } + return false; +} + +#endif // VTMD_INCLUDED |