diff options
| author | Aleksey Midenkov <midenok@gmail.com> | 2019-07-21 23:30:49 +0300 |
|---|---|---|
| committer | Aleksey Midenkov <midenok@gmail.com> | 2021-12-17 20:33:35 +0300 |
| commit | 86a5c6fcfe7347622337cec7ae963f7c387f1296 (patch) | |
| tree | f6ae4dbf24cd9beb1baec4d440fda2b77dfe5b22 /sql | |
| parent | 6208228b78917bff13b5dc34428b38596f7404b4 (diff) | |
| download | mariadb-git-preview-10.8-MDEV-17554-auto-create-partition.tar.gz | |
MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMITpreview-10.8-MDEV-17554-auto-create-partition
== Syntax change ==
Keyword AUTO enables partition auto-creation.
create table t1 (x int) with system versioning
partition by system_time interval 1 hour auto;
create table t1 (x int) with system versioning
partition by system_time interval 1 month
starts '2021-01-01 00:00:00' auto partitions 12;
create table t1 (x int) with system versioning
partition by system_time limit 1000 auto;
Or with explicit partitions:
create table t1 (x int) with system versioning
partition by system_time interval 1 hour auto
(partition p0 history, partition pn current);
== Description ==
Before executing history-generating DML command add N history
partitions, so that N would be sufficient for potentially generated
history. N > 1 may be required when history is rotated by INTERVAL and
timestamp was jumped to future further than interval value.
If last history partition exceeds LIMIT records before DML command
then new history partition is created and the history filling is
switched to the new partition. Thus LIMIT does not carry strict
limitation and the history partition size must be planned as LIMIT
records plus average number of history one DML generates.
Auto-creation is implemented by synchronous
fast_alter_partition_table() call from the thread of the executed DML
command before the command itself (by the fallback and retry mechanism
similar to Discovery feature, see Open_table_context).
The name for newly added partitions are generated like default
partition names with extension of MDEV-22155 (which avoids name
clashes by extending assignment counter to next free-enough gap).
These DML commands trigger auto-creation:
* DELETE (including multi-delete, excluding DELETE HISTORY)
* UPDATE (including multi-update)
* REPLACE (including REPLACE .. SELECT)
* INSERT .. ON DUPLICATE KEY UPDATE
* LOAD DATA .. REPLACE
MDEV-23642 Locking timeout caused by auto-creation affects original DML
The reasons for this are:
- Do not disrupt main business process (the history is auxiliary
service);
- Consequences are non-fatal (history is not lost, but comes into wrong
partition; fixed by partitioning rebuild);
- There is more freedom for application to fail in this case or
not: it may read warning info and find corresponding error number.
- While non-failing command is easy to handle by an application and
fail it, the opposite is hard to handle: there is no automatic
actions to fix failed command and retry, DBA intervention is
required and until then application is non-functioning.
MDEV-23639 Auto-create does not work under LOCK TABLES or inside triggers
Don't do tdc_remove_table() for OT_ADD_HISTORY_PARTITION because it is
not possible in locked tables mode.
LTM_LOCK_TABLES mode (and LTM_PRELOCKED_UNDER_LOCK_TABLES) works out
of the box as fast_alter_partition_table() can reopen tables via
locked_tables_list.
In LTM_PRELOCKED we reopen and relock table manually.
More fixes
* some_table_marked_for_reopen flag fix
some_table_marked_for_reopen affets only reopen of
m_locked_tables. I.e. Locked_tables_list::reopen_tables() reopens only
tables from m_locked_tables.
* Unused can_recover_from_failed_open() condition
Is recover_from_failed_open() can be really used after
open_and_process_routine()?
Diffstat (limited to 'sql')
| -rw-r--r-- | sql/ha_partition.cc | 20 | ||||
| -rw-r--r-- | sql/ha_partition.h | 9 | ||||
| -rw-r--r-- | sql/handler.cc | 3 | ||||
| -rw-r--r-- | sql/handler.h | 7 | ||||
| -rw-r--r-- | sql/lock.cc | 32 | ||||
| -rw-r--r-- | sql/lock.h | 2 | ||||
| -rw-r--r-- | sql/log_event.cc | 13 | ||||
| -rw-r--r-- | sql/log_event.h | 9 | ||||
| -rw-r--r-- | sql/log_event_server.cc | 22 | ||||
| -rw-r--r-- | sql/partition_info.cc | 210 | ||||
| -rw-r--r-- | sql/partition_info.h | 23 | ||||
| -rw-r--r-- | sql/rpl_rli.h | 2 | ||||
| -rw-r--r-- | sql/share/errmsg-utf8.txt | 4 | ||||
| -rw-r--r-- | sql/sql_base.cc | 285 | ||||
| -rw-r--r-- | sql/sql_base.h | 6 | ||||
| -rw-r--r-- | sql/sql_class.cc | 4 | ||||
| -rw-r--r-- | sql/sql_class.h | 8 | ||||
| -rw-r--r-- | sql/sql_lex.cc | 6 | ||||
| -rw-r--r-- | sql/sql_parse.cc | 3 | ||||
| -rw-r--r-- | sql/sql_partition.cc | 36 | ||||
| -rw-r--r-- | sql/sql_truncate.cc | 2 | ||||
| -rw-r--r-- | sql/sql_yacc.yy | 30 | ||||
| -rw-r--r-- | sql/table.cc | 2 | ||||
| -rw-r--r-- | sql/table.h | 19 | ||||
| -rw-r--r-- | sql/transaction.cc | 2 |
25 files changed, 629 insertions, 130 deletions
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index f17abed82ff..9ab9f2a9552 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -4097,18 +4097,9 @@ int ha_partition::external_lock(THD *thd, int lock_type) (void) (*file)->ha_external_lock(thd, lock_type); } while (*(++file)); } - if (lock_type == F_WRLCK) - { - if (m_part_info->part_expr) - m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0); - if (m_part_info->part_type == VERSIONING_PARTITION && - /* TODO: MDEV-20345 exclude more inapproriate commands like INSERT - These commands may be excluded because working history partition is needed - only for versioned DML. */ - thd->lex->sql_command != SQLCOM_SELECT && - thd->lex->sql_command != SQLCOM_INSERT_SELECT) - m_part_info->vers_set_hist_part(thd); - } + if (lock_type == F_WRLCK && m_part_info->part_expr) + m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0); + DBUG_RETURN(0); err_handler: @@ -4252,11 +4243,6 @@ int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type) { if (m_part_info->part_expr) m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0); - if (m_part_info->part_type == VERSIONING_PARTITION && - // TODO: MDEV-20345 (see above) - thd->lex->sql_command != SQLCOM_SELECT && - thd->lex->sql_command != SQLCOM_INSERT_SELECT) - m_part_info->vers_set_hist_part(thd); } DBUG_RETURN(error); } diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 14f68b36c0b..90c556635a3 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -1608,6 +1608,13 @@ public: } bool partition_engine() override { return 1;} + + /** + Get the number of records in part_elem and its subpartitions, if any. + Also sets read_partitions bit for each partition id it uses (that is needed + for vers_set_hist_part() because it is called before read_partitions bitmap + is initialized). + */ ha_rows part_records(partition_element *part_elem) { DBUG_ASSERT(m_part_info); @@ -1619,7 +1626,7 @@ public: for (; part_id < part_id_end; ++part_id) { handler *file= m_file[part_id]; - DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); + bitmap_set_bit(&(m_part_info->read_partitions), part_id); file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN); part_recs+= file->stats.records; } diff --git a/sql/handler.cc b/sql/handler.cc index 57992d98c04..cf3abe06f38 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1635,9 +1635,10 @@ int ha_commit_trans(THD *thd, bool all) DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL || trans == &thd->transaction->stmt); + DBUG_ASSERT(!thd->in_sub_stmt); + if (thd->in_sub_stmt) { - DBUG_ASSERT(0); /* Since we don't support nested statement transactions in 5.0, we can't commit or rollback stmt transactions while we are inside diff --git a/sql/handler.h b/sql/handler.h index fe61666bf20..40f5eb0f1ab 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -832,6 +832,8 @@ typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*); #define ALTER_PARTITION_TABLE_REORG (1ULL << 12) #define ALTER_PARTITION_CONVERT_IN (1ULL << 13) #define ALTER_PARTITION_CONVERT_OUT (1ULL << 14) +// Set for vers_add_auto_hist_parts() operation +#define ALTER_PARTITION_AUTO_HIST (1ULL << 15) /* This is master database for most of system tables. However there @@ -2148,7 +2150,6 @@ struct Vers_parse_info: public Table_period_info Table_period_info::start_end_t as_row; -protected: friend struct Table_scope_and_contents_source_st; void set_start(const LEX_CSTRING field_name) { @@ -2160,6 +2161,8 @@ protected: as_row.end= field_name; period.end= field_name; } + +protected: bool is_start(const char *name) const; bool is_end(const char *name) const; bool is_start(const Create_field &f) const; @@ -4257,6 +4260,8 @@ public: */ virtual uint lock_count(void) const { return 1; } /** + Get the lock(s) for the table and perform conversion of locks if needed. + Is not invoked for non-transactional temporary tables. @note store_lock() can return more than one lock if the table is MERGE diff --git a/sql/lock.cc b/sql/lock.cc index 2eba5df35f1..bfa3e7baa0f 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -654,7 +654,7 @@ bool mysql_lock_abort_for_thread(THD *thd, TABLE *table) a and b are freed with my_free() */ -MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b) +MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a, MYSQL_LOCK *b, THD *thd) { MYSQL_LOCK *sql_lock; TABLE **table, **end_table; @@ -662,16 +662,28 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b) DBUG_PRINT("enter", ("a->lock_count: %u b->lock_count: %u", a->lock_count, b->lock_count)); - if (!(sql_lock= (MYSQL_LOCK*) - my_malloc(key_memory_MYSQL_LOCK, sizeof(*sql_lock) + - sizeof(THR_LOCK_DATA*)*((a->lock_count+b->lock_count)*2) + - sizeof(TABLE*)*(a->table_count+b->table_count),MYF(MY_WME)))) - DBUG_RETURN(0); // Fatal error + const size_t lock_size= sizeof(*sql_lock) + + sizeof(THR_LOCK_DATA *) * ((a->lock_count + b->lock_count) * 2) + + sizeof(TABLE *) * (a->table_count + b->table_count); + if (thd) + { + sql_lock= (MYSQL_LOCK *) thd->alloc(lock_size); + if (!sql_lock) + DBUG_RETURN(0); + sql_lock->flags= GET_LOCK_ON_THD; + } + else + { + sql_lock= (MYSQL_LOCK *) + my_malloc(key_memory_MYSQL_LOCK, lock_size, MYF(MY_WME)); + if (!sql_lock) + DBUG_RETURN(0); + sql_lock->flags= 0; + } sql_lock->lock_count=a->lock_count+b->lock_count; sql_lock->table_count=a->table_count+b->table_count; sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1); sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count*2); - sql_lock->flags= 0; memcpy(sql_lock->locks,a->locks,a->lock_count*sizeof(*a->locks)); memcpy(sql_lock->locks+a->lock_count,b->locks, b->lock_count*sizeof(*b->locks)); @@ -705,8 +717,10 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b) a->lock_count, b->lock_count); /* Delete old, not needed locks */ - my_free(a); - my_free(b); + if (!(a->flags & GET_LOCK_ON_THD)) + my_free(a); + if (!(b->flags & GET_LOCK_ON_THD)) + my_free(b); DBUG_RETURN(sql_lock); } diff --git a/sql/lock.h b/sql/lock.h index 0b23ddd3846..85a93b9a7e3 100644 --- a/sql/lock.h +++ b/sql/lock.h @@ -34,7 +34,7 @@ int mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock); int mysql_unlock_some_tables(THD *thd, TABLE **table,uint count, uint flag); int mysql_lock_remove(THD *thd, MYSQL_LOCK *locked,TABLE *table); bool mysql_lock_abort_for_thread(THD *thd, TABLE *table); -MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b); +MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a, MYSQL_LOCK *b, THD *thd= NULL); /* Lock based on name */ bool lock_schema_name(THD *thd, const char *db); /* Lock based on stored routine name */ diff --git a/sql/log_event.cc b/sql/log_event.cc index afca79b008a..5206b1834fe 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -4108,3 +4108,16 @@ bool copy_event_cache_to_file_and_reinit(IO_CACHE *cache, FILE *file) return (my_b_copy_all_to_file(cache, file) || reinit_io_cache(cache, WRITE_CACHE, 0, FALSE, TRUE)); } + +#if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) +int Log_event::apply_event(rpl_group_info* rgi) +{ + int res; + THD_STAGE_INFO(thd, stage_apply_event); + rgi->current_event= this; + res= do_apply_event(rgi); + rgi->current_event= NULL; + THD_STAGE_INFO(thd, stage_after_apply_event); + return res; +} +#endif diff --git a/sql/log_event.h b/sql/log_event.h index 3adc7a26d93..e2479724972 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1509,14 +1509,7 @@ public: @see do_apply_event */ - int apply_event(rpl_group_info *rgi) - { - int res; - THD_STAGE_INFO(thd, stage_apply_event); - res= do_apply_event(rgi); - THD_STAGE_INFO(thd, stage_after_apply_event); - return res; - } + int apply_event(rpl_group_info *rgi); /** diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 2e4992a021e..179d1f067f0 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -5475,6 +5475,18 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) lex->query_tables_last= &tables->next_global; } } + + /* + It is needed to set_time(): + 1) it continues the property that "Time" in SHOW PROCESSLIST shows how + much slave is behind + 2) it will be needed when we allow replication from a table with no + TIMESTAMP column to a table with one. + So we call set_time(), like in SBR. Presently it changes nothing. + 3) vers_set_hist_part() requires proper query time. + */ + thd->set_time(when, when_sec_part); + if (unlikely(open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0))) { #ifdef WITH_WSREP @@ -5651,16 +5663,6 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) which tested replicate-* rules). */ - /* - It's not needed to set_time() but - 1) it continues the property that "Time" in SHOW PROCESSLIST shows how - much slave is behind - 2) it will be needed when we allow replication from a table with no - TIMESTAMP column to a table with one. - So we call set_time(), like in SBR. Presently it changes nothing. - */ - thd->set_time(when, when_sec_part); - if (m_width == table->s->fields && bitmap_is_set_all(&m_cols)) set_flags(COMPLETE_ROWS_F); diff --git a/sql/partition_info.cc b/sql/partition_info.cc index 62a3092f369..23194e43d06 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -38,6 +38,8 @@ #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" +#include "sql_table.h" +#include "transaction.h" partition_info *partition_info::get_clone(THD *thd) @@ -324,13 +326,11 @@ bool partition_info::set_partition_bitmaps_from_table(TABLE_LIST *table_list) The external routine needing this code is check_partition_info */ -#define MAX_PART_NAME_SIZE 8 - char *partition_info::create_default_partition_names(THD *thd, uint part_no, uint num_parts_arg, uint start_no) { - char *ptr= (char*) thd->calloc(num_parts_arg * MAX_PART_NAME_SIZE); + char *ptr= (char*) thd->calloc(num_parts_arg * MAX_PART_NAME_SIZE + 1); char *move_ptr= ptr; uint i= 0; DBUG_ENTER("create_default_partition_names"); @@ -819,10 +819,15 @@ bool partition_info::has_unique_name(partition_element *element) vers_info->interval Limit by fixed time interval vers_info->hist_part (out) Working history partition */ -void partition_info::vers_set_hist_part(THD *thd) +bool partition_info::vers_set_hist_part(THD *thd, uint *create_count) { + DBUG_ASSERT(!thd->lex->last_table() || + !thd->lex->last_table()->vers_conditions.delete_history); + const bool auto_hist= create_count && vers_info->auto_hist; + if (vers_info->limit) { + DBUG_ASSERT(!vers_info->interval.is_set()); ha_partition *hp= (ha_partition*)(table->file); partition_element *next= NULL; List_iterator<partition_element> it(partitions); @@ -841,22 +846,22 @@ void partition_info::vers_set_hist_part(THD *thd) { if (next == vers_info->now_part) { - my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG), - table->s->db.str, table->s->table_name.str, - vers_info->hist_part->partition_name, "LIMIT"); + if (auto_hist) + *create_count= 1; + else + my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG), + table->s->db.str, table->s->table_name.str, + vers_info->hist_part->partition_name, "LIMIT"); } else vers_info->hist_part= next; } - return; } - - if (vers_info->interval.is_set()) + else if (vers_info->interval.is_set() && + vers_info->hist_part->range_value <= thd->query_start()) { - if (vers_info->hist_part->range_value > thd->query_start()) - return; - partition_element *next= NULL; + bool error= true; List_iterator<partition_element> it(partitions); while (next != vers_info->hist_part) next= it++; @@ -865,9 +870,166 @@ void partition_info::vers_set_hist_part(THD *thd) { vers_info->hist_part= next; if (next->range_value > thd->query_start()) - return; + { + error= false; + break; + } + } + if (error) + { + if (auto_hist) + { + *create_count= 0; + const my_time_t hist_end= (my_time_t) vers_info->hist_part->range_value; + DBUG_ASSERT(thd->query_start() >= hist_end); + MYSQL_TIME h0, q0; + my_tz_OFFSET0->gmt_sec_to_TIME(&h0, hist_end); + my_tz_OFFSET0->gmt_sec_to_TIME(&q0, thd->query_start()); + longlong q= pack_time(&q0); + longlong h= pack_time(&h0); + while (h <= q) + { + if (date_add_interval(thd, &h0, vers_info->interval.type, + vers_info->interval.step)) + return true; + h= pack_time(&h0); + ++*create_count; + if (*create_count == MAX_PARTITIONS - 2) + { + my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(ME_WARNING)); + my_error(ER_VERS_HIST_PART_FAILED, MYF(0), + table->s->db.str, table->s->table_name.str); + return true; + } + } + } + else + { + my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG), + table->s->db.str, table->s->table_name.str, + vers_info->hist_part->partition_name, "INTERVAL"); + } + } + } + + return false; +} + + +/** + @brief Run fast_alter_partition_table() to add new history partitions + for tables requiring them. +*/ +bool vers_create_partitions(THD *thd, TABLE_LIST* tl, uint num_parts) +{ + bool result= true; + HA_CREATE_INFO create_info; + Alter_info alter_info; + partition_info *save_part_info= thd->work_part_info; + Query_tables_list save_query_tables; + Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer; + bool save_no_write_to_binlog= thd->lex->no_write_to_binlog; + thd->m_reprepare_observer= NULL; + thd->lex->reset_n_backup_query_tables_list(&save_query_tables); + thd->lex->no_write_to_binlog= true; + TABLE *table= tl->table; + + DBUG_ASSERT(!thd->is_error()); + + { + DBUG_ASSERT(table->s->get_table_ref_type() == TABLE_REF_BASE_TABLE); + DBUG_ASSERT(table->versioned()); + DBUG_ASSERT(table->part_info); + DBUG_ASSERT(table->part_info->vers_info); + alter_info.reset(); + alter_info.partition_flags= ALTER_PARTITION_ADD|ALTER_PARTITION_AUTO_HIST; + create_info.init(); + create_info.alter_info= &alter_info; + Alter_table_ctx alter_ctx(thd, tl, 1, &table->s->db, &table->s->table_name); + + MDL_REQUEST_INIT(&tl->mdl_request, MDL_key::TABLE, tl->db.str, + tl->table_name.str, MDL_SHARED_NO_WRITE, MDL_TRANSACTION); + if (thd->mdl_context.acquire_lock(&tl->mdl_request, + thd->variables.lock_wait_timeout)) + goto exit; + table->mdl_ticket= tl->mdl_request.ticket; + + create_info.db_type= table->s->db_type(); + create_info.options|= HA_VERSIONED_TABLE; + DBUG_ASSERT(create_info.db_type); + + create_info.vers_info.set_start(table->s->vers_start_field()->field_name); + create_info.vers_info.set_end(table->s->vers_end_field()->field_name); + + partition_info *part_info= new partition_info(); + if (unlikely(!part_info)) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto exit; + } + part_info->use_default_num_partitions= false; + part_info->use_default_num_subpartitions= false; + part_info->num_parts= num_parts; + part_info->num_subparts= table->part_info->num_subparts; + part_info->subpart_type= table->part_info->subpart_type; + if (unlikely(part_info->vers_init_info(thd))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + goto exit; + } + + thd->work_part_info= part_info; + if (part_info->set_up_defaults_for_partitioning(thd, table->file, NULL, + table->part_info->next_part_no(num_parts))) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + bool partition_changed= false; + bool fast_alter_partition= false; + if (prep_alter_part_table(thd, table, &alter_info, &create_info, + &partition_changed, &fast_alter_partition)) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + if (!fast_alter_partition) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + DBUG_ASSERT(partition_changed); + if (mysql_prepare_alter_table(thd, table, &create_info, &alter_info, + &alter_ctx)) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; + } + + if (fast_alter_partition_table(thd, table, &alter_info, &alter_ctx, + &create_info, tl)) + { + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), + tl->db.str, tl->table_name.str); + goto exit; } } + + result= false; + // NOTE: we have to return DA_EMPTY for new command + DBUG_ASSERT(thd->get_stmt_da()->is_ok()); + thd->get_stmt_da()->reset_diagnostics_area(); + +exit: + thd->work_part_info= save_part_info; + thd->m_reprepare_observer= save_reprepare_observer; + thd->lex->restore_backup_query_tables_list(&save_query_tables); + thd->lex->no_write_to_binlog= save_no_write_to_binlog; + return result; } @@ -2642,13 +2804,14 @@ bool partition_info::vers_init_info(THD * thd) bool partition_info::vers_set_interval(THD* thd, Item* interval, interval_type int_type, Item* starts, - const char *table_name) + bool auto_hist, const char *table_name) { DBUG_ASSERT(part_type == VERSIONING_PARTITION); MYSQL_TIME ltime; uint err; vers_info->interval.type= int_type; + vers_info->auto_hist= auto_hist; /* 1. assign INTERVAL to interval.step */ if (interval->fix_fields_if_needed_for_scalar(thd, &interval)) @@ -2730,6 +2893,23 @@ interval_starts_error: } +bool partition_info::vers_set_limit(ulonglong limit, bool auto_hist, + const char *table_name) +{ + DBUG_ASSERT(part_type == VERSIONING_PARTITION); + + if (limit < 1) + { + my_error(ER_PART_WRONG_VALUE, MYF(0), table_name, "LIMIT"); + return true; + } + + vers_info->limit= limit; + vers_info->auto_hist= auto_hist; + return !limit; +} + + bool partition_info::error_if_requires_values() const { switch (part_type) { diff --git a/sql/partition_info.h b/sql/partition_info.h index 995147d6766..cbd6ace8fa4 100644 --- a/sql/partition_info.h +++ b/sql/partition_info.h @@ -36,11 +36,11 @@ struct st_ddl_log_memory_entry; #define MAX_PART_NAME_SIZE 8 - struct Vers_part_info : public Sql_alloc { Vers_part_info() : limit(0), + auto_hist(false), now_part(NULL), hist_part(NULL) { @@ -49,6 +49,7 @@ struct Vers_part_info : public Sql_alloc Vers_part_info(Vers_part_info &src) : interval(src.interval), limit(src.limit), + auto_hist(src.auto_hist), now_part(NULL), hist_part(NULL) { @@ -72,9 +73,10 @@ struct Vers_part_info : public Sql_alloc my_time_t start; INTERVAL step; enum interval_type type; - bool is_set() { return type < INTERVAL_LAST; } + bool is_set() const { return type < INTERVAL_LAST; } } interval; ulonglong limit; + bool auto_hist; partition_element *now_part; partition_element *hist_part; }; @@ -393,14 +395,9 @@ public: bool vers_init_info(THD *thd); bool vers_set_interval(THD *thd, Item *interval, interval_type int_type, Item *starts, - const char *table_name); - bool vers_set_limit(ulonglong limit) - { - DBUG_ASSERT(part_type == VERSIONING_PARTITION); - vers_info->limit= limit; - return !limit; - } - void vers_set_hist_part(THD *thd); + bool auto_part, const char *table_name); + bool vers_set_limit(ulonglong limit, bool auto_part, const char *table_name); + bool vers_set_hist_part(THD* thd, uint *create_count); bool vers_fix_field_list(THD *thd); void vers_update_el_ids(); partition_element *get_partition(uint part_id) @@ -419,6 +416,7 @@ public: uint32 get_next_partition_id_range(struct st_partition_iter* part_iter); bool check_partition_dirs(partition_info *part_info); +bool vers_create_partitions(THD* thd, TABLE_LIST* tl, uint num_parts); /* Initialize the iterator to return a single partition with given part_id */ @@ -474,11 +472,6 @@ bool partition_info::vers_fix_field_list(THD * thd) } -/** - @brief Update partition_element's id - - @returns true on error; false on success -*/ inline void partition_info::vers_update_el_ids() { diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index cc807852bf2..056f484f3b0 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -668,6 +668,8 @@ struct rpl_group_info */ uint64 gtid_sub_id; rpl_gtid current_gtid; + /* Currently applied event or NULL */ + Log_event *current_event; uint64 commit_id; /* This is used to keep transaction commit order. diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index f31f21faeb0..a25705ff785 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7861,7 +7861,7 @@ ER_VERS_PERIOD_COLUMNS eng "PERIOD FOR SYSTEM_TIME must use columns %`s and %`s" ER_PART_WRONG_VALUE - eng "Wrong parameters for partitioned %`s: wrong value for '%s'" + eng "Wrong parameters for partitioned %`s: wrong value for %s" ER_VERS_WRONG_PARTS eng "Wrong partitions for %`s: must have at least one HISTORY and exactly one last CURRENT" @@ -7996,3 +7996,5 @@ ER_PARTITION_CONVERT_SUBPARTITIONED eng "Convert partition is not supported for subpartitioned table." ER_PROVIDER_NOT_LOADED eng "MariaDB tried to use the %s, but its provider plugin is not loaded" +ER_VERS_HIST_PART_FAILED + eng "Versioned table %`s.%`s: adding HISTORY partition(s) failed" diff --git a/sql/sql_base.cc b/sql/sql_base.cc index d983fe9a332..b3d717dfc07 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -63,7 +63,6 @@ #include "wsrep_trans_observer.h" #endif /* WITH_WSREP */ - bool No_such_table_error_handler::handle_condition(THD *, uint sql_errno, @@ -933,7 +932,7 @@ void close_thread_table(THD *thd, TABLE **table_ptr) DBUG_PRINT("tcache", ("table: '%s'.'%s' %p", table->s->db.str, table->s->table_name.str, table)); DBUG_ASSERT(!table->file->keyread_enabled()); - DBUG_ASSERT(!table->file || table->file->inited == handler::NONE); + DBUG_ASSERT(table->file->inited == handler::NONE); /* The metadata lock must be released after giving back @@ -945,11 +944,8 @@ void close_thread_table(THD *thd, TABLE **table_ptr) MDL_SHARED)); table->mdl_ticket= NULL; - if (table->file) - { - table->file->update_global_table_stats(); - table->file->update_global_index_stats(); - } + table->file->update_global_table_stats(); + table->file->update_global_index_stats(); /* This look is needed to allow THD::notify_shared_lock() to @@ -1627,6 +1623,138 @@ bool is_locked_view(THD *thd, TABLE_LIST *t) } +#ifdef WITH_PARTITION_STORAGE_ENGINE +/** + Switch part_info->hist_part and request partition creation if needed. + + @retval true Error or partition creation was requested. + @retval false No error +*/ +bool TABLE::vers_switch_partition(THD *thd, TABLE_LIST *table_list, + Open_table_context *ot_ctx) +{ + if (!part_info || part_info->part_type != VERSIONING_PARTITION || + table_list->vers_conditions.delete_history || + thd->stmt_arena->is_stmt_prepare() || + table_list->lock_type < TL_WRITE_ALLOW_WRITE || + table_list->mdl_request.type < MDL_SHARED_WRITE || + table_list->mdl_request.type == MDL_EXCLUSIVE) + { + return false; + } + + /* + NOTE: we need this condition of prelocking_placeholder because we cannot do + auto-create after the transaction is started. Auto-create does + close_tables_for_reopen() and that is not possible under started transaction. + Also the transaction may not be cancelled at that moment: f.ex. trigger + after insert is run when some data is already written. + + We must do auto-creation for PRELOCK_ROUTINE tables at the initial + open_tables() no matter what initiating sql_command is. + */ + if (table_list->prelocking_placeholder != TABLE_LIST::PRELOCK_ROUTINE) + { + switch (thd->lex->sql_command) + { + case SQLCOM_INSERT: + if (thd->lex->duplicates != DUP_UPDATE) + return false; + break; + case SQLCOM_LOAD: + if (thd->lex->duplicates != DUP_REPLACE) + return false; + break; + case SQLCOM_LOCK_TABLES: + case SQLCOM_DELETE: + case SQLCOM_UPDATE: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE_MULTI: + break; + default: + /* + TODO: make row events set thd->lex->sql_command appropriately. + + Sergei Golubchik: f.ex. currently row events increment + thd->status_var.com_stat[] each event for its own SQLCOM_xxx, it won't be + needed if they'll just set thd->lex->sql_command. + */ + if (thd->rgi_slave && thd->rgi_slave->current_event && + thd->lex->sql_command == SQLCOM_END) + { + switch (thd->rgi_slave->current_event->get_type_code()) + { + case UPDATE_ROWS_EVENT: + case UPDATE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT: + case DELETE_ROWS_EVENT_V1: + break; + default:; + return false; + } + } + break; + } + } + + TABLE *table= this; + + /* + NOTE: The semantics of vers_set_hist_part() is twofold: even when we + don't need auto-create, we need to update part_info->hist_part. + */ + uint *create_count= (table_list->vers_skip_create == thd->query_id) ? + NULL : &ot_ctx->vers_create_count; + table_list->vers_skip_create= thd->query_id; + if (table->part_info->vers_set_hist_part(thd, create_count)) + { + MYSQL_UNBIND_TABLE(table->file); + tc_release_table(table); + return true; + } + if (ot_ctx->vers_create_count) + { + Open_table_context::enum_open_table_action action; + TABLE_LIST *table_arg; + mysql_mutex_lock(&table->s->LOCK_share); + if (!table->s->vers_skip_auto_create) + { + table->s->vers_skip_auto_create= true; + action= Open_table_context::OT_ADD_HISTORY_PARTITION; + table_arg= table_list; + } + else + { + /* + NOTE: this may repeat multiple times until creating thread acquires + MDL_EXCLUSIVE. Since auto-creation is rare operation this is acceptable. + We could suspend this thread on cond-var but we must first exit + MDL_SHARED_WRITE first and we cannot store cond-var into TABLE_SHARE + because it is already released and there is no guarantee that it will + be same instance if we acquire it again. + */ + table_list->vers_skip_create= 0; + ot_ctx->vers_create_count= 0; + action= Open_table_context::OT_REOPEN_TABLES; + table_arg= NULL; + } + mysql_mutex_unlock(&table->s->LOCK_share); + if (!thd->locked_tables_mode) + { + MYSQL_UNBIND_TABLE(table->file); + tc_release_table(table); + } + ot_ctx->request_backoff_action(action, table_arg); + return true; + } + + return false; +} +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + + /** Open a base table. @@ -1779,6 +1907,8 @@ bool open_table(THD *thd, TABLE_LIST *table_list, Open_table_context *ot_ctx) DBUG_PRINT("info",("Using locked table")); #ifdef WITH_PARTITION_STORAGE_ENGINE part_names_error= set_partitions_as_used(table_list, table); + if (table->vers_switch_partition(thd, table_list, ot_ctx)) + DBUG_RETURN(true); #endif goto reset; } @@ -2034,6 +2164,11 @@ retry_share: tc_add_table(thd, table); } +#ifdef WITH_PARTITION_STORAGE_ENGINE + if (table->vers_switch_partition(thd, table_list, ot_ctx)) + DBUG_RETURN(true); +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + if (!(flags & MYSQL_OPEN_HAS_MDL_LOCK) && table->s->table_category < TABLE_CATEGORY_INFORMATION) { @@ -2111,6 +2246,7 @@ retry_share: table->init(thd, table_list); + DBUG_ASSERT(table != thd->open_tables); table->next= thd->open_tables; /* Link into simple list */ thd->set_open_tables(table); @@ -2576,7 +2712,7 @@ unlink_all_closed_tables(THD *thd, MYSQL_LOCK *lock, size_t reopen_count) This is only needed when LOCK TABLES is active */ -void Locked_tables_list::mark_table_for_reopen(THD *thd, TABLE *table) +void Locked_tables_list::mark_table_for_reopen(TABLE *table) { TABLE_SHARE *share= table->s; @@ -2589,11 +2725,13 @@ void Locked_tables_list::mark_table_for_reopen(THD *thd, TABLE *table) close_all_tables_for_name(). */ if (table_list->table && table_list->table->s == share) + { table_list->table->internal_set_needs_reopen(true); + some_table_marked_for_reopen= 1; + } } /* This is needed in the case where lock tables where not used */ table->internal_set_needs_reopen(true); - some_table_marked_for_reopen= 1; } @@ -3048,7 +3186,8 @@ Open_table_context::Open_table_context(THD *thd, uint flags) m_flags(flags), m_action(OT_NO_ACTION), m_has_locks(thd->mdl_context.has_locks()), - m_has_protection_against_grl(0) + m_has_protection_against_grl(0), + vers_create_count(0) {} @@ -3128,13 +3267,15 @@ request_backoff_action(enum_open_table_action action_arg, */ if (table) { - DBUG_ASSERT(action_arg == OT_DISCOVER || action_arg == OT_REPAIR); + DBUG_ASSERT(action_arg == OT_DISCOVER || action_arg == OT_REPAIR || + action_arg == OT_ADD_HISTORY_PARTITION); m_failed_table= (TABLE_LIST*) m_thd->alloc(sizeof(TABLE_LIST)); if (m_failed_table == NULL) return TRUE; m_failed_table->init_one_table(&table->db, &table->table_name, &table->alias, TL_WRITE); m_failed_table->open_strategy= table->open_strategy; m_failed_table->mdl_request.set_type(MDL_EXCLUSIVE); + m_failed_table->vers_skip_create= table->vers_skip_create; } m_action= action_arg; return FALSE; @@ -3195,13 +3336,49 @@ Open_table_context::recover_from_failed_open() break; case OT_DISCOVER: case OT_REPAIR: - if ((result= lock_table_names(m_thd, m_thd->lex->create_info, - m_failed_table, NULL, - get_timeout(), 0))) + case OT_ADD_HISTORY_PARTITION: + if (!m_thd->locked_tables_mode) + result= lock_table_names(m_thd, m_thd->lex->create_info, m_failed_table, + NULL, get_timeout(), 0); + else + { + DBUG_ASSERT(!result); + DBUG_ASSERT(m_action == OT_ADD_HISTORY_PARTITION); + } + /* + We are now under MDL_EXCLUSIVE mode. Other threads have no table share + acquired: they are blocked either at open_table_get_mdl_lock() in + open_table() or at lock_table_names() here. + */ + if (result) + { + if (m_action == OT_ADD_HISTORY_PARTITION) + { + TABLE_SHARE *share= tdc_acquire_share(m_thd, m_failed_table, + GTS_TABLE, NULL); + if (share) + { + share->vers_skip_auto_create= false; + tdc_release_share(share); + } + if (m_thd->get_stmt_da()->sql_errno() == ER_LOCK_WAIT_TIMEOUT) + { + // MDEV-23642 Locking timeout caused by auto-creation affects original DML + m_thd->clear_error(); + vers_create_count= 0; + result= false; + } + } break; + } - tdc_remove_table(m_thd, m_failed_table->db.str, - m_failed_table->table_name.str); + /* + We don't need to remove share under OT_ADD_HISTORY_PARTITION. + Moreover fast_alter_partition_table() works with TABLE instance. + */ + if (m_action != OT_ADD_HISTORY_PARTITION) + tdc_remove_table(m_thd, m_failed_table->db.str, + m_failed_table->table_name.str); switch (m_action) { @@ -3229,6 +3406,70 @@ Open_table_context::recover_from_failed_open() case OT_REPAIR: result= auto_repair_table(m_thd, m_failed_table); break; + case OT_ADD_HISTORY_PARTITION: + { + result= false; + TABLE *table= open_ltable(m_thd, m_failed_table, TL_WRITE, + MYSQL_OPEN_HAS_MDL_LOCK | MYSQL_OPEN_IGNORE_LOGGING_FORMAT); + if (table == NULL) + { + m_thd->clear_error(); + break; + } + + DBUG_ASSERT(vers_create_count); + result= vers_create_partitions(m_thd, m_failed_table, vers_create_count); + vers_create_count= 0; + if (!m_thd->transaction->stmt.is_empty()) + trans_commit_stmt(m_thd); + DBUG_ASSERT(!result || + !m_thd->locked_tables_mode || + m_thd->lock->lock_count); + if (result) + break; + if (!m_thd->locked_tables_mode) + { + /* + alter_partition_lock_handling() does mysql_lock_remove() but + does not clear thd->lock completely. + */ + DBUG_ASSERT(m_thd->lock->lock_count == 0); + if (!(m_thd->lock->flags & GET_LOCK_ON_THD)) + my_free(m_thd->lock); + m_thd->lock= NULL; + } + else if (m_thd->locked_tables_mode == LTM_PRELOCKED) + { + MYSQL_LOCK *lock; + MYSQL_LOCK *merged_lock; + + /* + In LTM_LOCK_TABLES table was reopened via locked_tables_list, + but not in prelocked environment where we have to reopen + the table manually. + */ + Open_table_context ot_ctx(m_thd, MYSQL_OPEN_REOPEN); + if (open_table(m_thd, m_failed_table, &ot_ctx)) + { + result= true; + break; + } + TABLE *table= m_failed_table->table; + table->reginfo.lock_type= m_thd->update_lock_default; + m_thd->in_lock_tables= 1; + lock= mysql_lock_tables(m_thd, &table, 1, + MYSQL_OPEN_REOPEN | MYSQL_LOCK_USE_MALLOC); + m_thd->in_lock_tables= 0; + if (lock == NULL || + !(merged_lock= mysql_lock_merge(m_thd->lock, lock, m_thd))) + { + result= true; + break; + } + m_thd->lock= merged_lock; + } + break; + } case OT_BACKOFF_AND_RETRY: case OT_REOPEN_TABLES: case OT_NO_ACTION: @@ -4203,6 +4444,7 @@ bool open_tables(THD *thd, const DDL_options_st &options, } thd->current_tablenr= 0; + restart: /* Close HANDLER tables which are marked for flush or against which there @@ -4283,6 +4525,9 @@ restart: /* For every table in the list of tables to open, try to find or open a table. + + NOTE: there can be duplicates in the list. F.ex. table specified in + LOCK TABLES and prelocked via another table (like when used in a trigger). */ for (tables= *table_to_open; tables; table_to_open= &tables->next_global, tables= tables->next_global) @@ -4377,6 +4622,8 @@ restart: { if (ot_ctx.can_recover_from_failed_open()) { + // FIXME: is this really used? + DBUG_ASSERT(0); close_tables_for_reopen(thd, start, ot_ctx.start_of_statement_svp()); if (ot_ctx.recover_from_failed_open()) @@ -5149,16 +5396,14 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type, if (table_list->table) DBUG_RETURN(table_list->table); - /* should not be used in a prelocked_mode context, see NOTE above */ - DBUG_ASSERT(thd->locked_tables_mode < LTM_PRELOCKED); - THD_STAGE_INFO(thd, stage_opening_tables); thd->current_tablenr= 0; /* open_ltable can be used only for BASIC TABLEs */ table_list->required_type= TABLE_TYPE_NORMAL; /* This function can't properly handle requests for such metadata locks. */ - DBUG_ASSERT(table_list->mdl_request.type < MDL_SHARED_UPGRADABLE); + DBUG_ASSERT(lock_flags & MYSQL_OPEN_HAS_MDL_LOCK || + table_list->mdl_request.type < MDL_SHARED_UPGRADABLE); while ((error= open_table(thd, table_list, &ot_ctx)) && ot_ctx.can_recover_from_failed_open()) diff --git a/sql/sql_base.h b/sql/sql_base.h index 5b449fdddac..a98ec297c78 100644 --- a/sql/sql_base.h +++ b/sql/sql_base.h @@ -532,7 +532,8 @@ public: OT_BACKOFF_AND_RETRY, OT_REOPEN_TABLES, OT_DISCOVER, - OT_REPAIR + OT_REPAIR, + OT_ADD_HISTORY_PARTITION }; Open_table_context(THD *thd, uint flags); @@ -605,6 +606,9 @@ private: protection against global read lock. */ mdl_bitmap_t m_has_protection_against_grl; + +public: + uint vers_create_count; }; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 29824301e9d..16bbc48d73b 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -844,7 +844,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */ #endif /* Call to init() below requires fully initialized Open_tables_state. */ - reset_open_tables_state(this); + reset_open_tables_state(); init(); debug_sync_init_thread(this); @@ -4535,7 +4535,7 @@ void THD::reset_n_backup_open_tables_state(Open_tables_backup *backup) DBUG_ENTER("reset_n_backup_open_tables_state"); backup->set_open_tables_state(this); backup->mdl_system_tables_svp= mdl_context.mdl_savepoint(); - reset_open_tables_state(this); + reset_open_tables_state(); state_flags|= Open_tables_state::BACKUPS_AVAIL; DBUG_VOID_RETURN; } diff --git a/sql/sql_class.h b/sql/sql_class.h index df9d89b5aff..686e6e70766 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1596,6 +1596,10 @@ enum enum_locked_tables_mode LTM_NONE= 0, LTM_LOCK_TABLES, LTM_PRELOCKED, + /* + TODO: remove LTM_PRELOCKED_UNDER_LOCK_TABLES: it is never used apart from + LTM_LOCK_TABLES. + */ LTM_PRELOCKED_UNDER_LOCK_TABLES, LTM_always_last }; @@ -1778,7 +1782,7 @@ public: *this= *state; } - void reset_open_tables_state(THD *thd) + void reset_open_tables_state() { open_tables= 0; temporary_tables= 0; @@ -2126,7 +2130,7 @@ public: bool restore_lock(THD *thd, TABLE_LIST *dst_table_list, TABLE *table, MYSQL_LOCK *lock); void add_back_last_deleted_lock(TABLE_LIST *dst_table_list); - void mark_table_for_reopen(THD *thd, TABLE *table); + void mark_table_for_reopen(TABLE *table); }; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index b5f8cf4a886..e2b6909bc5d 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -9666,7 +9666,11 @@ bool LEX::part_values_history(THD *thd) } else { - part_info->vers_init_info(thd); + if (unlikely(part_info->vers_init_info(thd))) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } elem->id= UINT_MAX32; } DBUG_ASSERT(part_info->vers_info); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index b9d3eec5a60..078ea0dae9b 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -84,6 +84,7 @@ #include "events.h" #include "sql_trigger.h" #include "transaction.h" +#include "sql_alter.h" #include "sql_audit.h" #include "sql_prepare.h" #include "sql_cte.h" @@ -6054,7 +6055,6 @@ finish: /* Free tables. Set stage 'closing tables' */ close_thread_tables(thd); - #ifndef DBUG_OFF if (lex->sql_command != SQLCOM_SET_OPTION && ! thd->in_sub_stmt) DEBUG_SYNC(thd, "execute_command_after_close_tables"); @@ -7566,6 +7566,7 @@ void THD::reset_for_next_command(bool do_clear_error) global_system_variables.auto_increment_increment; } #endif /* WITH_WSREP */ + query_start_sec_part_used= 0; is_fatal_error= time_zone_used= 0; log_current_statement= 0; diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index b75a318ab65..2e23662349f 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -2580,11 +2580,17 @@ char *generate_partition_syntax(THD *thd, partition_info *part_info, err+= str.append('\''); } } - if (vers_info->limit) + else if (vers_info->limit) { err+= str.append(STRING_WITH_LEN("LIMIT ")); err+= str.append_ulonglong(vers_info->limit); } + if (vers_info->auto_hist) + { + DBUG_ASSERT(vers_info->interval.is_set() || + vers_info->limit); + err+= str.append(STRING_WITH_LEN(" AUTO")); + } } else if (part_info->part_expr) { @@ -5326,7 +5332,9 @@ that are reorganised. now_part= el; } } - if (*fast_alter_table && tab_part_info->vers_info->interval.is_set()) + if (*fast_alter_table && + !(alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST) && + tab_part_info->vers_info->interval.is_set()) { partition_element *hist_part= tab_part_info->vers_info->hist_part; if (hist_part->range_value <= thd->query_start()) @@ -6038,7 +6046,7 @@ err: records are added */ -static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) +static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, bool copy_data) { char path[FN_REFLEN+1]; int error; @@ -6048,7 +6056,7 @@ static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) build_table_filename(path, sizeof(path) - 1, lpt->db.str, lpt->table_name.str, "", 0); - if(mysql_trans_prepare_alter_copy_data(thd)) + if(copy_data && mysql_trans_prepare_alter_copy_data(thd)) DBUG_RETURN(TRUE); /* TODO: test if bulk_insert would increase the performance */ @@ -6062,7 +6070,9 @@ static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt) file->print_error(error, MYF(error != ER_OUTOFMEMORY ? 0 : ME_FATAL)); } - if (mysql_trans_commit_alter_copy_data(thd)) + DBUG_ASSERT(copy_data || (!lpt->copied && !lpt->deleted)); + + if (copy_data && mysql_trans_commit_alter_copy_data(thd)) error= 1; /* The error has been reported */ DBUG_RETURN(MY_TEST(error)); @@ -7341,7 +7351,8 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, thd->variables.option_bits|= OPTION_IF_EXISTS; if (table->file->alter_table_flags(alter_info->flags) & - HA_PARTITION_ONE_PHASE) + HA_PARTITION_ONE_PHASE && + !(alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST)) { /* In the case where the engine supports one phase online partition @@ -7383,7 +7394,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, 2) Perform the change within the handler */ if (mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || - mysql_change_partitions(lpt)) + mysql_change_partitions(lpt, true)) { goto err; } @@ -7570,9 +7581,14 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, ERROR_INJECT("convert_partition_11")) goto err; } + /* + TODO: would be good if adding new empty VERSIONING partitions would always + go this way, auto or not. + */ else if ((alter_info->partition_flags & ALTER_PARTITION_ADD) && (part_info->part_type == RANGE_PARTITION || - part_info->part_type == LIST_PARTITION)) + part_info->part_type == LIST_PARTITION || + alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST)) { DBUG_ASSERT(!(alter_info->partition_flags & ALTER_PARTITION_CONVERT_IN)); /* @@ -7613,7 +7629,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, ERROR_INJECT("add_partition_3") || write_log_add_change_partition(lpt) || ERROR_INJECT("add_partition_4") || - mysql_change_partitions(lpt) || + mysql_change_partitions(lpt, false) || ERROR_INJECT("add_partition_5") || alter_close_table(lpt) || ERROR_INJECT("add_partition_6") || @@ -7700,7 +7716,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, ERROR_INJECT("change_partition_2") || write_log_add_change_partition(lpt) || ERROR_INJECT("change_partition_3") || - mysql_change_partitions(lpt) || + mysql_change_partitions(lpt, true) || ERROR_INJECT("change_partition_4") || wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED) || ERROR_INJECT("change_partition_5") || diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index c6af72c5979..a0ef89ff0f5 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -511,7 +511,7 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) (HTON_REQUIRES_CLOSE_AFTER_TRUNCATE | HTON_TRUNCATE_REQUIRES_EXCLUSIVE_USE))) { - thd->locked_tables_list.mark_table_for_reopen(thd, table_ref->table); + thd->locked_tables_list.mark_table_for_reopen(table_ref->table); if (unlikely(thd->locked_tables_list.reopen_tables(thd, false))) thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 442644eddd6..f9c575d39c6 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1506,6 +1506,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); condition_number opt_versioning_interval_start +%type <num> opt_vers_auto_part + %type <item_param> param_marker %type <item_num> @@ -5093,24 +5095,20 @@ opt_part_option: opt_versioning_rotation: /* empty */ {} - | INTERVAL_SYM expr interval opt_versioning_interval_start + | INTERVAL_SYM expr interval opt_versioning_interval_start opt_vers_auto_part { partition_info *part_info= Lex->part_info; const char *table_name= Lex->create_last_non_select_table->table_name.str; - if (unlikely(part_info->vers_set_interval(thd, $2, $3, $4, table_name))) + if (unlikely(part_info->vers_set_interval(thd, $2, $3, $4, $5, table_name))) MYSQL_YYABORT; } - | LIMIT ulonglong_num - { - partition_info *part_info= Lex->part_info; - if (unlikely(part_info->vers_set_limit($2))) + | LIMIT ulonglong_num opt_vers_auto_part { - my_error(ER_PART_WRONG_VALUE, MYF(0), - Lex->create_last_non_select_table->table_name.str, - "LIMIT"); - MYSQL_YYABORT; + partition_info *part_info= Lex->part_info; + const char *table_name= Lex->create_last_non_select_table->table_name.str; + if (unlikely(part_info->vers_set_limit($2, $3, table_name))) + MYSQL_YYABORT; } - } ; @@ -5125,6 +5123,16 @@ opt_versioning_interval_start: } ; +opt_vers_auto_part: + /* empty */ + { + $$= 0; + } + | AUTO_SYM + { + $$= 1; + } + ; /* End of partition parser part */ diff --git a/sql/table.cc b/sql/table.cc index b1a7b6bfe2b..38d48100359 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -10123,5 +10123,5 @@ void TABLE::mark_table_for_reopen() { THD *thd= in_use; DBUG_ASSERT(thd); - thd->locked_tables_list.mark_table_for_reopen(thd, this); + thd->locked_tables_list.mark_table_for_reopen(this); } diff --git a/sql/table.h b/sql/table.h index 6aa75df39c6..c8ea4094409 100644 --- a/sql/table.h +++ b/sql/table.h @@ -64,6 +64,7 @@ class derived_handler; class Pushdown_derived; struct Name_resolution_context; class Table_function_json_table; +class Open_table_context; /* Used to identify NESTED_JOIN structures within a join (applicable only to @@ -910,6 +911,13 @@ struct TABLE_SHARE vers_kind_t versioned; period_info_t vers; period_info_t period; + /* + Protect multiple threads from repeating partition auto-create over + single share. + + TODO: remove it when partitioning metadata will be in TABLE_SHARE. + */ + bool vers_skip_auto_create; bool init_period_from_extra2(period_info_t *period, const uchar *data, const uchar *end); @@ -1766,6 +1774,10 @@ public: ulonglong vers_start_id() const; ulonglong vers_end_id() const; +#ifdef WITH_PARTITION_STORAGE_ENGINE + bool vers_switch_partition(THD *thd, TABLE_LIST *table_list, + Open_table_context *ot_ctx); +#endif int update_generated_fields(); int period_make_insert(Item *src, Field *dst); @@ -2558,6 +2570,13 @@ struct TABLE_LIST bool merged; bool merged_for_insert; bool sequence; /* Part of NEXTVAL/CURVAL/LASTVAL */ + /* + Protect single thread from repeating partition auto-create over + multiple share instances (as the share is closed on backoff action). + + Skips auto-create only for one given query id. + */ + query_id_t vers_skip_create; /* Items created by create_view_field and collected to change them in case diff --git a/sql/transaction.cc b/sql/transaction.cc index 958abebfc47..b1e98be56bc 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -463,7 +463,7 @@ bool trans_commit_stmt(THD *thd) a savepoint for each nested statement, and release the savepoint when statement has succeeded. */ - DBUG_ASSERT(! thd->in_sub_stmt); + DBUG_ASSERT(!(thd->in_sub_stmt)); thd->merge_unsafe_rollback_flags(); |
