summaryrefslogtreecommitdiff
path: root/sql/partition_info.cc
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mariadb.org>2018-02-21 15:16:19 +0100
committerSergei Golubchik <serg@mariadb.org>2018-02-23 19:17:48 +0100
commite36c5ec0a50332840c7dcb8e6b08a369ec2a829c (patch)
treeaf84bd00042773cabddb4150748601c53d78b783 /sql/partition_info.cc
parent7961bc4b890071f281da88845489cdddc54c289b (diff)
downloadmariadb-git-e36c5ec0a50332840c7dcb8e6b08a369ec2a829c.tar.gz
PARTITION BY SYSTEM_TIME INTERVAL ...
Lots of changes: * calculate the current history partition in ::external_lock(), not in ::write_row() or ::update_row() * remove dynamically collected per-partition row_end stats * no full table scan in open_table_from_share to calculate these stats, no manual MDL/thr_locks in open_table_from_share * no shared stats in TABLE_SHARE = no mutexes or condition waits when calculating current history partition * always compare timestamps, don't convert them to MYSQL_TIME (avoid DST ambiguity, and it's faster too) * correct interval handling, 1 month = 1 month, not 30 * 24 * 3600 seconds * save/restore first partition start time, and count intervals from there * only allow to drop first partitions if INTERVAL * when adding new history partitions, split the data in the last history parition, if it was overflowed * show partition boundaries in INFORMATION_SCHEMA.PARTITIONS
Diffstat (limited to 'sql/partition_info.cc')
-rw-r--r--sql/partition_info.cc510
1 files changed, 61 insertions, 449 deletions
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index 2bef5bbb25b..e6ad7c9b008 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -871,485 +871,97 @@ bool partition_info::vers_init_info(THD * thd)
return false;
}
-bool partition_info::vers_set_interval(const INTERVAL & i)
+void partition_info::vers_set_hist_part(THD *thd)
{
- if (i.neg || i.second_part)
- return true;
-
- DBUG_ASSERT(vers_info);
-
- // TODO: INTERVAL conversion to seconds leads to mismatch with calendar intervals (MONTH and YEAR)
- vers_info->interval= static_cast<my_time_t>(
- i.second +
- i.minute * 60 +
- i.hour * 60 * 60 +
- i.day * 24 * 60 * 60 +
- i.month * 30 * 24 * 60 * 60 +
- i.year * 365 * 30 * 24 * 60 * 60);
-
- if (vers_info->interval == 0)
- return true;
-
- return false;
-}
-
-bool partition_info::vers_set_limit(ulonglong limit)
-{
- if (limit < 1)
- return true;
-
- DBUG_ASSERT(vers_info);
-
- vers_info->limit= limit;
- return false;
-}
-
-partition_element*
-partition_info::vers_part_rotate(THD * thd)
-{
- DBUG_ASSERT(table && table->s);
- DBUG_ASSERT(vers_info && vers_info->initialized());
-
- if (table->s->hist_part_id >= vers_info->now_part->id - 1)
- {
- DBUG_ASSERT(table->s->hist_part_id == vers_info->now_part->id - 1);
- push_warning_printf(thd,
- thd->lex->sql_command == SQLCOM_ALTER_TABLE ?
- Sql_condition::WARN_LEVEL_NOTE :
- Sql_condition::WARN_LEVEL_WARN,
- WARN_VERS_PART_FULL,
- ER_THD(thd, WARN_VERS_PART_FULL),
- table->s->db.str, table->s->error_table_name(),
- vers_info->hist_part->partition_name);
- return vers_info->hist_part;
- }
-
- table->s->hist_part_id++;
- const char* old_part_name= vers_info->hist_part->partition_name;
- vers_hist_part();
-
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_NOTE,
- WARN_VERS_PART_ROTATION,
- ER_THD(thd, WARN_VERS_PART_ROTATION),
- table->s->db.str, table->s->error_table_name(),
- old_part_name,
- vers_info->hist_part->partition_name);
-
- return vers_info->hist_part;
-}
-
-bool partition_info::vers_set_expression(THD *thd, partition_element *el, MYSQL_TIME& t)
-{
- curr_part_elem= el;
- init_column_part(thd);
- el->list_val_list.empty();
- el->list_val_list.push_back(curr_list_val, thd->mem_root);
- for (uint i= 0; i < num_columns; ++i)
- {
- part_column_list_val *col_val= add_column_value(thd);
- if (el->type() == partition_element::CURRENT)
- {
- col_val->max_value= true;
- col_val->item_expression= NULL;
- col_val->column_value= NULL;
- col_val->part_info= this;
- col_val->fixed= 1;
- continue;
- }
- Item *item_expression= new (thd->mem_root)
- Item_datetime_literal(thd, &t, 0);
- if (!item_expression)
- return true;
- /* We initialize col_val with bogus max value to make fix_partition_func() and check_range_constants() happy.
- Later in vers_setup_stats() it is initialized with real stat value if there will be any. */
- /* FIXME: TIME_RESULT in col_val is expensive. It should be INT_RESULT
- (got to be fixed when InnoDB is supported). */
- init_col_val(col_val, item_expression);
- DBUG_ASSERT(item_expression == el->get_col_val(i).item_expression);
- } // for (num_columns)
- return false;
-}
-
-bool partition_info::vers_setup_expression(THD * thd, uint32 alter_add)
-{
- DBUG_ASSERT(part_type == VERSIONING_PARTITION);
-
- if (!table->versioned(VERS_TIMESTAMP))
- {
- my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->s->table_name.str);
- return true;
- }
-
- if (alter_add)
+ if (vers_info->limit)
{
- DBUG_ASSERT(partitions.elements > alter_add + 1);
- Vers_min_max_stats** old_array= table->s->stat_trx;
- table->s->stat_trx= static_cast<Vers_min_max_stats**>(
- alloc_root(&table->s->mem_root, sizeof(void *) * (partitions.elements * num_columns + 1)));
- memcpy(table->s->stat_trx, old_array, sizeof(void *) * (partitions.elements - alter_add) * num_columns);
- table->s->stat_trx[partitions.elements * num_columns]= NULL;
- }
- else
- {
- /* Prepare part_field_list */
- Field *row_end= table->vers_end_field();
- part_field_list.push_back(row_end->field_name.str, thd->mem_root);
- DBUG_ASSERT(part_field_list.elements == num_columns);
- // needed in handle_list_of_fields()
- row_end->flags|= GET_FIXED_FIELDS_FLAG;
- }
-
- List_iterator<partition_element> it(partitions);
- partition_element *el;
- MYSQL_TIME t;
- memset(&t, 0, sizeof(t));
- my_time_t ts= TIMESTAMP_MAX_VALUE - partitions.elements;
- uint32 id= 0;
- while ((el= it++))
- {
- DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL);
- ++ts;
- if (alter_add)
- {
- /* Non-empty historical partitions are left as is. */
- if (el->type() == partition_element::HISTORY && !el->empty)
- {
- ++id;
- continue;
- }
- /* Newly added element is inserted before AS_OF_NOW. */
- if (el->id == UINT_MAX32 || el->type() == partition_element::CURRENT)
- {
- DBUG_ASSERT(table && table->s);
- Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root)
- Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s);
- table->s->stat_trx[id * num_columns + STAT_TRX_END]= stat_trx_end;
- el->id= id++;
- if (el->type() == partition_element::CURRENT)
- break;
- goto set_expression;
- }
- /* Existing element expression is recalculated. */
- thd->variables.time_zone->gmt_sec_to_TIME(&t, ts);
- for (uint i= 0; i < num_columns; ++i)
- {
- part_column_list_val &col_val= el->get_col_val(i);
- static_cast<Item_datetime_literal *>(col_val.item_expression)->set_time(&t);
- col_val.fixed= 0;
- }
- ++id;
- continue;
- }
-
- set_expression:
- thd->variables.time_zone->gmt_sec_to_TIME(&t, ts);
- if (vers_set_expression(thd, el, t))
- return true;
- }
- return false;
-}
-
-
-class Table_locker
-{
- THD *thd;
- TABLE &table;
- thr_lock_type saved_type;
- MYSQL_LOCK *saved_lock;
- enum_locked_tables_mode saved_mode;
- TABLE_LIST **saved_query_tables_own_last;
- TABLE_LIST table_list;
- bool locked;
-
-public:
- Table_locker(THD *_thd, TABLE &_table, thr_lock_type lock_type) :
- thd(_thd),
- table(_table),
- saved_type(table.reginfo.lock_type),
- saved_lock(_thd->lock),
- saved_mode(_thd->locked_tables_mode),
- saved_query_tables_own_last(_thd->lex->query_tables_own_last),
- table_list(&_table, lock_type),
- locked(false)
- {
- table.reginfo.lock_type= lock_type;
- }
- bool lock()
- {
- DBUG_ASSERT(table.file);
- // FIXME: check consistency with table.reginfo.lock_type
- if (table.file->get_lock_type() != F_UNLCK
- || table.s->tmp_table)
+ ha_partition *hp= (ha_partition*)(table->file);
+ partition_element *next= NULL;
+ List_iterator<partition_element> it(partitions);
+ while (next != vers_info->hist_part)
+ next= it++;
+ ha_rows records= hp->part_records(next);
+ while ((next= it++) != vers_info->now_part)
{
- return false;
+ ha_rows next_records= hp->part_records(next);
+ if (next_records == 0)
+ break;
+ vers_info->hist_part= next;
+ records= next_records;
}
- thd->lock= NULL;
- thd->locked_tables_mode= LTM_NONE;
- thd->lex->query_tables_own_last= NULL;
- bool res= lock_tables(thd, &table_list, 1, 0);
- locked= !res;
- return res;
- }
- ~Table_locker()
- {
- if (locked)
- mysql_unlock_tables(thd, thd->lock);
- table.reginfo.lock_type= saved_type;
- thd->lock= saved_lock;
- thd->locked_tables_mode= saved_mode;
- thd->lex->query_tables_own_last= saved_query_tables_own_last;
- if (locked && !thd->in_sub_stmt)
+ if (records > vers_info->limit)
{
- ha_commit_trans(thd, false);
- ha_commit_trans(thd, true);
+ if (next == vers_info->now_part)
+ goto warn;
+ vers_info->hist_part= next;
}
- }
-};
-
-
-// scan table for min/max row_end
-inline
-bool partition_info::vers_scan_min_max(THD *thd, partition_element *part)
-{
- uint32 sub_factor= num_subparts ? num_subparts : 1;
- uint32 part_id= part->id * sub_factor;
- uint32 part_id_end= part_id + sub_factor;
- DBUG_ASSERT(part->empty);
- DBUG_ASSERT(part->type() == partition_element::HISTORY);
- DBUG_ASSERT(table->s->stat_trx);
-
- Table_locker l(thd, *table, TL_READ);
- if (l.lock())
- {
- my_error(ER_INTERNAL_ERROR, MYF(0), "min/max scan failed on lock_tables()");
- return true;
+ return;
}
- for (; part_id < part_id_end; ++part_id)
+ if (vers_info->interval.is_set())
{
- handler *file= table->file->part_handler(part_id); // requires update_partition() for ha_innopart
- DBUG_ASSERT(file);
+ if (vers_info->hist_part->range_value > thd->system_time)
+ return;
- table->default_column_bitmaps();
- bitmap_set_bit(table->read_set, table->vers_end_field()->field_index);
- file->column_bitmaps_signal();
+ partition_element *next= NULL;
+ List_iterator<partition_element> it(partitions);
+ while (next != vers_info->hist_part)
+ next= it++;
- int rc= file->ha_rnd_init(true);
- if (!rc)
+ while ((next= it++) != vers_info->now_part)
{
- while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE)
- {
- if (part->empty)
- part->empty= false;
- if (thd->killed)
- {
- file->ha_rnd_end();
- file->update_partition(part_id);
- ha_commit_trans(thd, false);
- return true;
- }
- if (rc)
- {
- if (rc == HA_ERR_RECORD_DELETED)
- continue;
- break;
- }
- if (table->vers_end_field()->is_max())
- {
- rc= HA_ERR_INTERNAL_ERROR;
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- WARN_VERS_PART_NON_HISTORICAL,
- ER_THD(thd, WARN_VERS_PART_NON_HISTORICAL),
- part->partition_name);
- break;
- }
- if (table->versioned(VERS_TRX_ID))
- {
- uchar buf[8];
- Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6);
- if (!vers_trx_id_to_ts(thd, table->vers_end_field(), fld))
- {
- vers_stat_trx(STAT_TRX_END, part).update_unguarded(&fld);
- }
- }
- else
- {
- vers_stat_trx(STAT_TRX_END, part).update_unguarded(table->vers_end_field());
- }
- }
- file->ha_rnd_end();
- }
- file->update_partition(part_id);
- if (rc != HA_ERR_END_OF_FILE)
- {
- // TODO: print rc code
- my_error(ER_INTERNAL_ERROR, MYF(0), "min/max scan failed in versioned partitions setup (see warnings)");
- return true;
+ vers_info->hist_part= next;
+ if (next->range_value > thd->system_time)
+ return;
}
+ goto warn;
}
- return false;
+ return;
+warn:
+ my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG),
+ table->s->db.str, table->s->error_table_name(),
+ vers_info->hist_part->partition_name);
}
-void partition_info::vers_update_col_vals(THD *thd, partition_element *el0, partition_element *el1)
+
+bool partition_info::vers_setup_expression(THD * thd, uint32 alter_add)
{
- MYSQL_TIME t;
- memset(&t, 0, sizeof(t));
- DBUG_ASSERT(table && table->s && table->s->stat_trx);
- DBUG_ASSERT(!el0 || el1->id == el0->id + 1);
- const uint idx= el1->id * num_columns;
- my_time_t ts;
- part_column_list_val *col_val;
- Item_datetime_literal *val_item;
- Vers_min_max_stats *stat_trx_x;
- for (uint i= 0; i < num_columns; ++i)
+ if (!table->versioned())
{
- stat_trx_x= table->s->stat_trx[idx + i];
- if (el0)
- {
- ts= stat_trx_x->min_time();
- thd->variables.time_zone->gmt_sec_to_TIME(&t, ts);
- col_val= &el0->get_col_val(i);
- val_item= static_cast<Item_datetime_literal*>(col_val->item_expression);
- DBUG_ASSERT(val_item);
- if (*val_item > t)
- {
- val_item->set_time(&t);
- col_val->fixed= 0;
- }
- }
- col_val= &el1->get_col_val(i);
- if (!col_val->max_value)
- {
- ts= stat_trx_x->max_time() + 1;
- thd->variables.time_zone->gmt_sec_to_TIME(&t, ts);
- val_item= static_cast<Item_datetime_literal*>(col_val->item_expression);
- DBUG_ASSERT(val_item);
- if (*val_item < t)
- {
- val_item->set_time(&t);
- col_val->fixed= 0;
- }
- }
+ my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->s->table_name.str);
+ return true;
}
-}
-
-// setup at open() phase (TABLE_SHARE is initialized)
-bool partition_info::vers_setup_stats(THD * thd, bool is_create_table_ind)
-{
DBUG_ASSERT(part_type == VERSIONING_PARTITION);
- DBUG_ASSERT(vers_info && vers_info->initialized(false));
- DBUG_ASSERT(table && table->s);
-
- bool error= false;
+ DBUG_ASSERT(table->versioned(VERS_TIMESTAMP));
+ DBUG_ASSERT(num_columns == 1);
- TABLE_LIST tl(table, TL_READ);
- MDL_auto_lock mdl_lock(thd, tl);
- if (mdl_lock.acquire_error())
- return true;
-
- mysql_mutex_lock(&table->s->LOCK_rotation);
- if (table->s->busy_rotation)
+ if (!alter_add)
{
- table->s->vers_wait_rotation();
- vers_hist_part();
+ Field *row_end= table->vers_end_field();
+ part_field_list.push_back(row_end->field_name.str, thd->mem_root);
+ DBUG_ASSERT(part_field_list.elements == 1);
+ // needed in handle_list_of_fields()
+ row_end->flags|= GET_FIXED_FIELDS_FLAG;
}
- else
- {
- table->s->busy_rotation= true;
- mysql_mutex_unlock(&table->s->LOCK_rotation);
- DBUG_ASSERT(part_field_list.elements == num_columns);
-
- bool dont_stat= true;
- bool col_val_updated= false;
- // initialize stat_trx
- if (!table->s->stat_trx)
- {
- DBUG_ASSERT(partitions.elements > 1);
- table->s->stat_trx= static_cast<Vers_min_max_stats**>(
- alloc_root(&table->s->mem_root, sizeof(void *) * (partitions.elements * num_columns + 1)));
- table->s->stat_trx[partitions.elements * num_columns]= NULL;
- dont_stat= false;
- }
-
- // build freelist, scan min/max, assign hist_part
+ if (alter_add)
+ {
List_iterator<partition_element> it(partitions);
- partition_element *el= NULL, *prev;
- while ((prev= el, el= it++))
+ partition_element *el;
+ for(uint32 id= 0; ((el= it++)); id++)
{
- if (el->type() == partition_element::HISTORY && dont_stat)
- {
- if (el->id == table->s->hist_part_id)
- {
- vers_info->hist_part= el;
- break;
- }
- continue;
- }
-
- {
- Vers_min_max_stats *stat_trx_end= new (&table->s->mem_root)
- Vers_min_max_stats(&table->s->vers_end_field()->field_name, table->s);
- table->s->stat_trx[el->id * num_columns + STAT_TRX_END]= stat_trx_end;
- }
-
- if (!is_create_table_ind)
+ DBUG_ASSERT(el->type() != partition_element::CONVENTIONAL);
+ /* Newly added element is inserted before AS_OF_NOW. */
+ if (el->id == UINT_MAX32 || el->type() == partition_element::CURRENT)
{
+ el->id= id;
if (el->type() == partition_element::CURRENT)
- {
- uchar buf[8];
- Field_timestampf fld(buf, NULL, 0, Field::NONE, &table->vers_end_field()->field_name, NULL, 6);
- fld.set_max();
- vers_stat_trx(STAT_TRX_END, el).update_unguarded(&fld);
- el->empty= false;
- }
- else if (vers_scan_min_max(thd, el))
- {
- table->s->stat_trx= NULL; // may be a leak on endless table open
- error= true;
break;
- }
- if (!el->empty)
- {
- vers_update_col_vals(thd, prev, el);
- col_val_updated= true;
- }
- }
-
- if (el->type() == partition_element::CURRENT)
- break;
-
- DBUG_ASSERT(el->type() == partition_element::HISTORY);
-
- if (vers_info->hist_part)
- {
- if (!el->empty)
- goto set_hist_part;
- }
- else
- {
- set_hist_part:
- vers_info->hist_part= el;
- continue;
}
- } // while
-
- if (!error && !dont_stat)
- {
- if (col_val_updated)
- table->s->stat_serial++;
-
- table->s->hist_part_id= vers_info->hist_part->id;
}
- mysql_mutex_lock(&table->s->LOCK_rotation);
- mysql_cond_broadcast(&table->s->COND_rotation);
- table->s->busy_rotation= false;
}
- mysql_mutex_unlock(&table->s->LOCK_rotation);
- return error;
+ return false;
}
@@ -1682,11 +1294,11 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type,
DBUG_ASSERT(vers_info);
if (num_parts < 2 || !vers_info->now_part)
{
- DBUG_ASSERT(info && info->alias.str);
+ DBUG_ASSERT(info);
+ DBUG_ASSERT(info->alias.str);
my_error(ER_VERS_WRONG_PARTS, MYF(0), info->alias.str);
goto end;
}
- DBUG_ASSERT(vers_info->initialized(false));
DBUG_ASSERT(num_parts == partitions.elements);
}
i= 0;
@@ -1800,7 +1412,7 @@ bool partition_info::check_partition_info(THD *thd, handlerton **eng_type,
if (hist_parts > 1)
{
- if (unlikely(vers_info->limit == 0 && vers_info->interval == 0))
+ if (vers_info->limit == 0 && !vers_info->interval.is_set())
{
push_warning_printf(thd,
Sql_condition::WARN_LEVEL_WARN,