diff options
author | Sergei Golubchik <serg@mariadb.org> | 2018-02-21 15:16:19 +0100 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2018-02-23 19:17:48 +0100 |
commit | e36c5ec0a50332840c7dcb8e6b08a369ec2a829c (patch) | |
tree | af84bd00042773cabddb4150748601c53d78b783 /sql/partition_info.cc | |
parent | 7961bc4b890071f281da88845489cdddc54c289b (diff) | |
download | mariadb-git-e36c5ec0a50332840c7dcb8e6b08a369ec2a829c.tar.gz |
PARTITION BY SYSTEM_TIME INTERVAL ...
Lots of changes:
* calculate the current history partition in ::external_lock(),
not in ::write_row() or ::update_row()
* remove dynamically collected per-partition row_end stats
* no full table scan in open_table_from_share to calculate these
stats, no manual MDL/thr_locks in open_table_from_share
* no shared stats in TABLE_SHARE = no mutexes or condition waits when
calculating current history partition
* always compare timestamps, don't convert them to MYSQL_TIME
(avoid DST ambiguity, and it's faster too)
* correct interval handling, 1 month = 1 month, not 30 * 24 * 3600 seconds
* save/restore first partition start time, and count intervals from there
* only allow to drop first partitions if INTERVAL
* when adding new history partitions, split the data in the last history
parition, if it was overflowed
* show partition boundaries in INFORMATION_SCHEMA.PARTITIONS
Diffstat (limited to 'sql/partition_info.cc')
-rw-r--r-- | sql/partition_info.cc | 510 |
1 files changed, 61 insertions, 449 deletions
diff --git a/sql/partition_info.cc b/sql/partition_info.cc index 2bef5bbb25b..e6ad7c9b008 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -871,485 +871,97 @@ bool partition_info::vers_init_info(THD * thd) return false; } -bool partition_info::vers_set_interval(const INTERVAL & i) +void partition_info::vers_set_hist_part(THD *thd) { - if (i.neg || i.second_part) - return true; - - DBUG_ASSERT(vers_info); - - // TODO: INTERVAL conversion to seconds leads to mismatch with calendar intervals (MONTH and YEAR) - vers_info->interval= static_cast<my_time_t>( - i.second + - i.minute * 60 + - i.hour * 60 * 60 + - i.day * 24 * 60 * 60 + - i.month * 30 * 24 * 60 * 60 + - i.year * 365 * 30 * 24 * 60 * 60); - - if (vers_info->interval == 0) - return true; - - return false; -} - -bool partition_info::vers_set_limit(ulonglong limit) -{ - if (limit < 1) - return true; - - DBUG_ASSERT(vers_info); - - vers_info->limit= limit; - return false; -} - -partition_element* -partition_info::vers_part_rotate(THD * thd) -{ - DBUG_ASSERT(table && table->s); - DBUG_ASSERT(vers_info && vers_info->initialized()); - - if (table->s->hist_part_id >= vers_info->now_part->id - 1) - { - DBUG_ASSERT(table->s->hist_part_id == vers_info->now_part->id - 1); - push_warning_printf(thd, - thd->lex->sql_command == SQLCOM_ALTER_TABLE ? - Sql_condition::WARN_LEVEL_NOTE : - Sql_condition::WARN_LEVEL_WARN, - WARN_VERS_PART_FULL, - ER_THD(thd, WARN_VERS_PART_FULL), - table->s->db.str, table->s->error_table_name(), - vers_info->hist_part->partition_name); - return vers_info->hist_part; - } - - table->s->hist_part_id++; - const char* old_part_name= vers_info->hist_part->partition_name; - vers_hist_part(); - - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_NOTE, - WARN_VERS_PART_ROTATION, - ER_THD(thd, WARN_VERS_PART_ROTATION), - table->s->db.str, table->s->error_table_name(), - old_part_name, - vers_info->hist_part->partition_name); - - return vers_info->hist_part; -} - -bool partition_info::vers_set_expression(THD *thd, partition_element *el, MYSQL_TIME& t) -{ - curr_part_elem= el; - init_column_part(thd); - el->list_val_list.empty(); - el->list_val_list.push_back(curr_list_val, thd->mem_root); - for (uint i= 0; i < num_columns; ++i) - { - part_column_list_val *col_val= add_column_value(thd); - if (el->type() == partition_element::CURRENT) - { - col_val->max_value= true; - col_val->item_expression= NULL; - col_val->column_value= NULL; - col_val->part_info= this; - col_val->fixed= 1; - continue; - } - Item *item_expression= new (thd->mem_root) - Item_datetime_literal(thd, &t, 0); - if (!item_expression) - return true; - /* We initialize col_val with bogus max value to make fix_partition_func() and check_range_constants() happy. - Later in vers_setup_stats() it is initialized with real stat value if there will be any. */ - /* FIXME: TIME_RESULT in col_val is expensive. It should be INT_RESULT - (got to be fixed when InnoDB is supported). */ - init_col_val(col_val, item_expression); - DBUG_ASSERT(item_expression == el->get_col_val(i).item_expression); - } // for (num_columns) - return false; -} - -bool partition_info::vers_setup_expression(THD * thd, uint32 alter_add) -{ - DBUG_ASSERT(part_type == VERSIONING_PARTITION); - - if (!table->versioned(VERS_TIMESTAMP)) - { - my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->s->table_name.str); - return true; - } - - if (alter_add) + if (vers_info->limit) { - DBUG_ASSERT(partitions.elements > alter_add + 1); - Vers_min_max_stats** old_array= table->s->stat_trx; - table->s->stat_trx= static_cast<Vers_min_max_stats**>( - alloc_root(&table->s->mem_root, sizeof(void *) * (partitions.elements * num_columns + 1))); - memcpy(table->s->stat_trx, old_array, sizeof(void *) * (partitions.elements - alter_add) * num_columns); - table->s->stat_trx[partitions.elements * num_columns]= NULL; - } - else - { - /* Prepare part_field_list */ - Field *row_end= table->vers_end_field(); - part_field_list.push_back(row_end->field_name.str, thd->mem_root); - DBUG_ASSERT(part_field_list.elements == num_columns); - // needed in handle_list_of_fields() - row_end->flags|= GET_FIXED_FIELDS_FLAG; - } - - List_iterator<partition_element> it(partitions); - partition_element *el; - MYSQL_TIME t; - memset(&t, 0, sizeof(t)); - my_time_t ts= TIMESTAMP_MAX_VALUE - partitions.elements; - uint32 id= 0; - while ((el= it++)) - { - DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL); - ++ts; - if (alter_add) - { - /* Non-empty historical partitions are left as is. */ - if (el->type() == partition_element::HISTORY && !el->empty) - { - ++id; - continue; - } - /* Newly added element is inserted before AS_OF_NOW. */ - if (el->id == UINT_MAX32 || el->type() == partition_element::CURRENT) - { - DBUG_ASSERT(table && table->s); - Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root) - Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s); - table->s->stat_trx[id * num_columns + STAT_TRX_END]= stat_trx_end; - el->id= id++; - if (el->type() == partition_element::CURRENT) - break; - goto set_expression; - } - /* Existing element expression is recalculated. */ - thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); - for (uint i= 0; i < num_columns; ++i) - { - part_column_list_val &col_val= el->get_col_val(i); - static_cast<Item_datetime_literal *>(col_val.item_expression)->set_time(&t); - col_val.fixed= 0; - } - ++id; - continue; - } - - set_expression: - thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); - if (vers_set_expression(thd, el, t)) - return true; - } - return false; -} - - -class Table_locker -{ - THD *thd; - TABLE &table; - thr_lock_type saved_type; - MYSQL_LOCK *saved_lock; - enum_locked_tables_mode saved_mode; - TABLE_LIST **saved_query_tables_own_last; - TABLE_LIST table_list; - bool locked; - -public: - Table_locker(THD *_thd, TABLE &_table, thr_lock_type lock_type) : - thd(_thd), - table(_table), - saved_type(table.reginfo.lock_type), - saved_lock(_thd->lock), - saved_mode(_thd->locked_tables_mode), - saved_query_tables_own_last(_thd->lex->query_tables_own_last), - table_list(&_table, lock_type), - locked(false) - { - table.reginfo.lock_type= lock_type; - } - bool lock() - { - DBUG_ASSERT(table.file); - // FIXME: check consistency with table.reginfo.lock_type - if (table.file->get_lock_type() != F_UNLCK - || table.s->tmp_table) + ha_partition *hp= (ha_partition*)(table->file); + partition_element *next= NULL; + List_iterator<partition_element> it(partitions); + while (next != vers_info->hist_part) + next= it++; + ha_rows records= hp->part_records(next); + while ((next= it++) != vers_info->now_part) { - return false; + ha_rows next_records= hp->part_records(next); + if (next_records == 0) + break; + vers_info->hist_part= next; + records= next_records; } - thd->lock= NULL; - thd->locked_tables_mode= LTM_NONE; - thd->lex->query_tables_own_last= NULL; - bool res= lock_tables(thd, &table_list, 1, 0); - locked= !res; - return res; - } - ~Table_locker() - { - if (locked) - mysql_unlock_tables(thd, thd->lock); - table.reginfo.lock_type= saved_type; - thd->lock= saved_lock; - thd->locked_tables_mode= saved_mode; - thd->lex->query_tables_own_last= saved_query_tables_own_last; - if (locked && !thd->in_sub_stmt) + if (records > vers_info->limit) { - ha_commit_trans(thd, false); - ha_commit_trans(thd, true); + if (next == vers_info->now_part) + goto warn; + vers_info->hist_part= next; } - } -}; - - -// scan table for min/max row_end -inline -bool partition_info::vers_scan_min_max(THD *thd, partition_element *part) -{ - uint32 sub_factor= num_subparts ? num_subparts : 1; - uint32 part_id= part->id * sub_factor; - uint32 part_id_end= part_id + sub_factor; - DBUG_ASSERT(part->empty); - DBUG_ASSERT(part->type() == partition_element::HISTORY); - DBUG_ASSERT(table->s->stat_trx); - - Table_locker l(thd, *table, TL_READ); - if (l.lock()) - { - my_error(ER_INTERNAL_ERROR, MYF(0), "min/max scan failed on lock_tables()"); - return true; + return; } - for (; part_id < part_id_end; ++part_id) + if (vers_info->interval.is_set()) { - handler *file= table->file->part_handler(part_id); // requires update_partition() for ha_innopart - DBUG_ASSERT(file); + if (vers_info->hist_part->range_value > thd->system_time) + return; - table->default_column_bitmaps(); - bitmap_set_bit(table->read_set, table->vers_end_field()->field_index); - file->column_bitmaps_signal(); + partition_element *next= NULL; + List_iterator<partition_element> it(partitions); + while (next != vers_info->hist_part) + next= it++; - int rc= file->ha_rnd_init(true); - if (!rc) + while ((next= it++) != vers_info->now_part) { - while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) - { - if (part->empty) - part->empty= false; - if (thd->killed) - { - file->ha_rnd_end(); - file->update_partition(part_id); - ha_commit_trans(thd, false); - return true; - } - if (rc) - { - if (rc == HA_ERR_RECORD_DELETED) - continue; - break; - } - if (table->vers_end_field()->is_max()) - { - rc= HA_ERR_INTERNAL_ERROR; - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - WARN_VERS_PART_NON_HISTORICAL, - ER_THD(thd, WARN_VERS_PART_NON_HISTORICAL), - part->partition_name); - break; - } - if (table->versioned(VERS_TRX_ID)) - { - uchar buf[8]; - Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); - if (!vers_trx_id_to_ts(thd, table->vers_end_field(), fld)) - { - vers_stat_trx(STAT_TRX_END, part).update_unguarded(&fld); - } - } - else - { - vers_stat_trx(STAT_TRX_END, part).update_unguarded(table->vers_end_field()); - } - } - file->ha_rnd_end(); - } - file->update_partition(part_id); - if (rc != HA_ERR_END_OF_FILE) - { - // TODO: print rc code - my_error(ER_INTERNAL_ERROR, MYF(0), "min/max scan failed in versioned partitions setup (see warnings)"); - return true; + vers_info->hist_part= next; + if (next->range_value > thd->system_time) + return; } + goto warn; } - return false; + return; +warn: + my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG), + table->s->db.str, table->s->error_table_name(), + vers_info->hist_part->partition_name); } -void partition_info::vers_update_col_vals(THD *thd, partition_element *el0, partition_element *el1) + +bool partition_info::vers_setup_expression(THD * thd, uint32 alter_add) { - MYSQL_TIME t; - memset(&t, 0, sizeof(t)); - DBUG_ASSERT(table && table->s && table->s->stat_trx); - DBUG_ASSERT(!el0 || el1->id == el0->id + 1); - const uint idx= el1->id * num_columns; - my_time_t ts; - part_column_list_val *col_val; - Item_datetime_literal *val_item; - Vers_min_max_stats *stat_trx_x; - for (uint i= 0; i < num_columns; ++i) + if (!table->versioned()) { - stat_trx_x= table->s->stat_trx[idx + i]; - if (el0) - { - ts= stat_trx_x->min_time(); - thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); - col_val= &el0->get_col_val(i); - val_item= static_cast<Item_datetime_literal*>(col_val->item_expression); - DBUG_ASSERT(val_item); - if (*val_item > t) - { - val_item->set_time(&t); - col_val->fixed= 0; - } - } - col_val= &el1->get_col_val(i); - if (!col_val->max_value) - { - ts= stat_trx_x->max_time() + 1; - thd->variables.time_zone->gmt_sec_to_TIME(&t, ts); - val_item= static_cast<Item_datetime_literal*>(col_val->item_expression); - DBUG_ASSERT(val_item); - if (*val_item < t) - { - val_item->set_time(&t); - col_val->fixed= 0; - } - } + my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->s->table_name.str); + return true; } -} - -// setup at open() phase (TABLE_SHARE is initialized) -bool partition_info::vers_setup_stats(THD * thd, bool is_create_table_ind) -{ DBUG_ASSERT(part_type == VERSIONING_PARTITION); - DBUG_ASSERT(vers_info && vers_info->initialized(false)); - DBUG_ASSERT(table && table->s); - - bool error= false; + DBUG_ASSERT(table->versioned(VERS_TIMESTAMP)); + DBUG_ASSERT(num_columns == 1); - TABLE_LIST tl(table, TL_READ); - MDL_auto_lock mdl_lock(thd, tl); - if (mdl_lock.acquire_error()) - return true; - - mysql_mutex_lock(&table->s->LOCK_rotation); - if (table->s->busy_rotation) + if (!alter_add) { - table->s->vers_wait_rotation(); - vers_hist_part(); + Field *row_end= table->vers_end_field(); + part_field_list.push_back(row_end->field_name.str, thd->mem_root); + DBUG_ASSERT(part_field_list.elements == 1); + // needed in handle_list_of_fields() + row_end->flags|= GET_FIXED_FIELDS_FLAG; } - else - { - table->s->busy_rotation= true; - mysql_mutex_unlock(&table->s->LOCK_rotation); - DBUG_ASSERT(part_field_list.elements == num_columns); - - bool dont_stat= true; - bool col_val_updated= false; - // initialize stat_trx - if (!table->s->stat_trx) - { - DBUG_ASSERT(partitions.elements > 1); - table->s->stat_trx= static_cast<Vers_min_max_stats**>( - alloc_root(&table->s->mem_root, sizeof(void *) * (partitions.elements * num_columns + 1))); - table->s->stat_trx[partitions.elements * num_columns]= NULL; - dont_stat= false; - } - - // build freelist, scan min/max, assign hist_part + if (alter_add) + { List_iterator<partition_element> it(partitions); - partition_element *el= NULL, *prev; - while ((prev= el, el= it++)) + partition_element *el; + for(uint32 id= 0; ((el= it++)); id++) { - if (el->type() == partition_element::HISTORY && dont_stat) - { - if (el->id == table->s->hist_part_id) - { - vers_info->hist_part= el; - break; - } - continue; - } - - { - Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root) - Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s); - table->s->stat_trx[el->id * num_columns + STAT_TRX_END]= stat_trx_end; - } - - if (!is_create_table_ind) + DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL); + /* Newly added element is inserted before AS_OF_NOW. */ + if (el->id == UINT_MAX32 || el->type() == partition_element::CURRENT) { + el->id= id; if (el->type() == partition_element::CURRENT) - { - uchar buf[8]; - Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6); - fld.set_max(); - vers_stat_trx(STAT_TRX_END, el).update_unguarded(&fld); - el->empty= false; - } - else if (vers_scan_min_max(thd, el)) - { - table->s->stat_trx= NULL; // may be a leak on endless table open - error= true; break; - } - if (!el->empty) - { - vers_update_col_vals(thd, prev, el); - col_val_updated= true; - } - } - - if (el->type() == partition_element::CURRENT) - break; - - DBUG_ASSERT(el->type() == partition_element::HISTORY); - - if (vers_info->hist_part) - { - if (!el->empty) - goto set_hist_part; - } - else - { - set_hist_part: - vers_info->hist_part= el; - continue; } - } // while - - if (!error && !dont_stat) - { - if (col_val_updated) - table->s->stat_serial++; - - table->s->hist_part_id= vers_info->hist_part->id; } - mysql_mutex_lock(&table->s->LOCK_rotation); - mysql_cond_broadcast(&table->s->COND_rotation); - table->s->busy_rotation= false; } - mysql_mutex_unlock(&table->s->LOCK_rotation); - return error; + return false; } @@ -1682,11 +1294,11 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, DBUG_ASSERT(vers_info); if (num_parts < 2 || !vers_info->now_part) { - DBUG_ASSERT(info && info->alias.str); + DBUG_ASSERT(info); + DBUG_ASSERT(info->alias.str); my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias.str); goto end; } - DBUG_ASSERT(vers_info->initialized(false)); DBUG_ASSERT(num_parts == partitions.elements); } i= 0; @@ -1800,7 +1412,7 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type, if (hist_parts > 1) { - if (unlikely(vers_info->limit == 0 && vers_info->interval == 0)) + if (vers_info->limit == 0 && !vers_info->interval.is_set()) { push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, |