summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMichael Widenius <monty@askmonty.org>2011-05-18 19:26:30 +0300
committerMichael Widenius <monty@askmonty.org>2011-05-18 19:26:30 +0300
commit3631146442bc09e5129334774b4bcdf2470a4b27 (patch)
treec812cd2520a8af89c95abf5e6fbec47998908c58 /sql
parent5c70f813f3d12974bf67d6ff98c0ff4b9ca71d7d (diff)
downloadmariadb-git-3631146442bc09e5129334774b4bcdf2470a4b27.tar.gz
Original idea from Zardosht Kasheff to add HA_CLUSTERED_INDEX
- Added a lot of code comments - Updated get_best_ror_intersec() to prefer index scan on not clustered keys before clustered keys. - Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY - For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered() - Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect. sql/ha_partition.h: Added comment with warning for code unsafe to use with multiple storage engines at the same time sql/handler.h: Added HA_CLUSTERED_INDEX. Documented primary_key_is_clustered() sql/opt_range.cc: Added code comments Updated get_best_ror_intersec() to ignore clustered keys. Optimized away cpk_scan_used and one instance of current_thd (Simpler code) Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY sql/sql_select.cc: Changed comment to #ifdef For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered() (Change is smaller than what it looks beause of indentation change) sql/sql_table.cc: Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect. storage/innobase/handler/ha_innodb.h: Added support for HA_CLUSTERED_INDEX storage/innodb_plugin/handler/ha_innodb.cc: Added support for HA_CLUSTERED_INDEX storage/xtradb/handler/ha_innodb.cc: Added support for HA_CLUSTERED_INDEX
Diffstat (limited to 'sql')
-rw-r--r--sql/ha_partition.h4
-rw-r--r--sql/handler.h26
-rw-r--r--sql/opt_range.cc54
-rw-r--r--sql/sql_select.cc77
-rw-r--r--sql/sql_table.cc3
5 files changed, 107 insertions, 57 deletions
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index b1e39cf4d22..49aa7103e1c 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -877,6 +877,10 @@ public:
*/
virtual ulong index_flags(uint inx, uint part, bool all_parts) const
{
+ /*
+ The following code is not safe if you are using different
+ storage engines or different index types per partition.
+ */
return m_file[0]->index_flags(inx, part, all_parts);
}
diff --git a/sql/handler.h b/sql/handler.h
index d929181088a..ea709d9dddc 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -161,8 +161,11 @@
*/
#define HA_KEY_SCAN_NOT_ROR 128
#define HA_DO_INDEX_COND_PUSHDOWN 256 /* Supports Index Condition Pushdown */
-
-
+/*
+ Data is clustered on this key. This means that when you read the key
+ you also get the row data in the same block.
+*/
+#define HA_CLUSTERED_INDEX 512
/*
bits in alter_table_flags:
@@ -2311,9 +2314,22 @@ public:
/*
- @retval TRUE Primary key (if there is one) is clustered
- key covering all fields
- @retval FALSE otherwise
+ Check if the primary key (if there is one) is a clustered key covering
+ all fields. This means:
+
+ - Data is stored together with the primary key (no secondary lookup
+ needed to find the row data). The optimizer uses this to find out
+ the cost of fetching data.
+ - The primary key is part of each secondary key and is used
+ to find the row data in the primary index when reading trough
+ secondary indexes.
+ - When doing a HA_KEYREAD_ONLY we get also all the primary key parts
+ into the row. This is critical property used by index_merge.
+
+ For a clustered primary key, index_flags() returns also HA_CLUSTERED_INDEX
+
+ @retval TRUE yes
+ @retval FALSE No.
*/
virtual bool primary_key_is_clustered() { return FALSE; }
virtual int cmp_ref(const uchar *ref1, const uchar *ref2)
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 102fd881d9f..13afffd6ba9 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -1816,6 +1816,12 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
DBUG_VOID_RETURN;
}
+/*
+ QUICK_INDEX_SORT_SELECT works as follows:
+ - Do index scans, accumulate rowids in the Unique object
+ (Unique will also sort and de-duplicate rowids)
+ - Use rowids from unique to run a disk-ordered sweep
+*/
QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
TABLE *table)
@@ -1848,7 +1854,18 @@ QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
if (head->file->primary_key_is_clustered() &&
quick_sel_range->index == head->s->primary_key)
{
- /* A quick_select over a clustered primary key is handled specifically */
+ /*
+ A quick_select over a clustered primary key is handled specifically
+ Here we assume:
+ - PK columns are included in any other merged index
+ - Scan on the PK is disk-ordered.
+ (not meeting #2 will only cause performance degradation)
+
+ We could treat clustered PK as any other index, but that would
+ be inefficient. There is no point in doing scan on
+ CPK, remembering the rowid, then making rnd_pos() call with
+ that rowid.
+ */
pk_quick_select= quick_sel_range;
DBUG_RETURN(0);
}
@@ -4298,11 +4315,19 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records)
DBUG_ENTER("get_sweep_read_cost");
if (param->table->file->primary_key_is_clustered())
{
+ /*
+ We are using the primary key to find the rows.
+ Calculate the cost for this.
+ */
result= param->table->file->read_time(param->table->s->primary_key,
(uint)records, records);
}
else
{
+ /*
+ Rows will be retreived with rnd_pos(). Caluclate the expected
+ cost for this.
+ */
double n_blocks=
ceil(ulonglong2double(param->table->file->stats.data_file_length) /
IO_SIZE);
@@ -5013,7 +5038,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
if ((*index_scan)->keynr == table->s->primary_key)
{
common->cpk_scan= cpk_scan= *index_scan;
- break;
+ break;
}
}
}
@@ -6187,7 +6212,6 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
ROR_SCAN_INFO **cur_ror_scan;
ROR_SCAN_INFO *cpk_scan= NULL;
uint cpk_no;
- bool cpk_scan_used= FALSE;
if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
sizeof(ROR_SCAN_INFO*)*
@@ -6199,11 +6223,20 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
{
ROR_SCAN_INFO *scan;
+ uint key_no;
if (!tree->ror_scans_map.is_set(idx))
continue;
+ key_no= param->real_keynr[idx];
+ if (key_no != cpk_no &&
+ param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX)
+ {
+ /* Ignore clustering keys */
+ tree->n_ror_scans--;
+ continue;
+ }
if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
return NULL;
- if (param->real_keynr[idx] == cpk_no)
+ if (key_no == cpk_no)
{
cpk_scan= scan;
tree->n_ror_scans--;
@@ -6289,15 +6322,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
{
if (ror_intersect_add(intersect, cpk_scan, TRUE) &&
(intersect->total_cost < min_cost))
- {
- cpk_scan_used= TRUE;
intersect_best= intersect; //just set pointer here
- }
}
+ else
+ cpk_scan= 0; // Don't use cpk_scan
/* Ok, return ROR-intersect plan if we have found one */
TRP_ROR_INTERSECT *trp= NULL;
- if (min_cost < read_time && (cpk_scan_used || best_num > 1))
+ if (min_cost < read_time && (cpk_scan || best_num > 1))
{
if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
DBUG_RETURN(trp);
@@ -6316,7 +6348,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
set_if_smaller(param->table->quick_condition_rows, best_rows);
trp->records= best_rows;
trp->index_scan_costs= intersect_best->index_scan_costs;
- trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
+ trp->cpk_scan= cpk_scan;
DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
"cost %g, records %lu",
trp->read_cost, (ulong) trp->records));
@@ -9511,10 +9543,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
bool pk_is_clustered= file->primary_key_is_clustered();
if (index_only &&
(file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
- !(pk_is_clustered && keynr == param->table->s->primary_key))
+ !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
*mrr_flags |= HA_MRR_INDEX_ONLY;
- if (current_thd->lex->sql_command != SQLCOM_SELECT)
+ if (param->thd->lex->sql_command != SQLCOM_SELECT)
*mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
*bufsize= param->thd->variables.mrr_buff_size;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 103a228ebb6..908f518fe41 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8550,17 +8550,19 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
else if (!table->covering_keys.is_clear_all() &&
!(tab->select && tab->select->quick))
{ // Only read index tree
+#ifdef BAD_OPTIMIZATION
/*
- It has turned out that the below change, while speeding things
- up for disk-bound loads, slows them down for cases when the data
- is in disk cache (see BUG#35850):
- // See bug #26447: "Using the clustered index for a table scan
- // is always faster than using a secondary index".
+ It has turned out that the below change, while speeding things
+ up for disk-bound loads, slows them down for cases when the data
+ is in disk cache (see BUG#35850):
+ See bug #26447: "Using the clustered index for a table scan
+ is always faster than using a secondary index".
+ */
if (table->s->primary_key != MAX_KEY &&
table->file->primary_key_is_clustered())
tab->index= table->s->primary_key;
else
- */
+#endif
tab->index=find_shortest_key(table, & table->covering_keys);
tab->read_first_record= join_read_first;
/* Read with index_first / index_next */
@@ -16525,9 +16527,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
*/
DBUG_ASSERT (ref_key != (int) nr);
- bool is_covering= table->covering_keys.is_set(nr) ||
- (nr == table->s->primary_key &&
- table->file->primary_key_is_clustered());
+ bool is_covering= (table->covering_keys.is_set(nr) ||
+ (table->file->index_flags(nr, 0, 1) &
+ HA_CLUSTERED_INDEX));
/*
Don't use an index scan with ORDER BY without limit.
@@ -16680,42 +16682,37 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
/*
filesort() and join cache are usually faster than reading in
index order and not using join cache, except in case that chosen
- index is clustered primary key.
+ index is clustered key.
*/
- if ((select_limit >= table_records) &&
- (tab->type == JT_ALL &&
- tab->join->tables > tab->join->const_tables + 1) &&
- ((unsigned) best_key != table->s->primary_key ||
- !table->file->primary_key_is_clustered()))
+ if (best_key < 0 ||
+ ((select_limit >= table_records) &&
+ (tab->type == JT_ALL &&
+ tab->join->tables > tab->join->const_tables + 1) &&
+ !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
goto use_filesort;
- if (best_key >= 0)
+ if (table->quick_keys.is_set(best_key) && best_key != ref_key)
{
- if (table->quick_keys.is_set(best_key) && best_key != ref_key)
- {
- key_map map;
- map.clear_all(); // Force the creation of quick select
- map.set_bit(best_key); // only best_key.
- select->quick= 0;
- select->test_quick_select(join->thd, map, 0,
- join->select_options & OPTION_FOUND_ROWS ?
- HA_POS_ERROR :
- join->unit->select_limit_cnt,
- TRUE, FALSE);
- }
- order_direction= best_key_direction;
- /*
- saved_best_key_parts is actual number of used keyparts found by the
- test_if_order_by_key function. It could differ from keyinfo->key_parts,
- thus we have to restore it in case of desc order as it affects
- QUICK_SELECT_DESC behaviour.
- */
- used_key_parts= (order_direction == -1) ?
- saved_best_key_parts : best_key_parts;
+ key_map map;
+ map.clear_all(); // Force the creation of quick select
+ map.set_bit(best_key); // only best_key.
+ select->quick= 0;
+ select->test_quick_select(join->thd, map, 0,
+ join->select_options & OPTION_FOUND_ROWS ?
+ HA_POS_ERROR :
+ join->unit->select_limit_cnt,
+ TRUE, FALSE);
}
- else
- goto use_filesort;
- }
+ order_direction= best_key_direction;
+ /*
+ saved_best_key_parts is actual number of used keyparts found by the
+ test_if_order_by_key function. It could differ from keyinfo->key_parts,
+ thus we have to restore it in case of desc order as it affects
+ QUICK_SELECT_DESC behaviour.
+ */
+ used_key_parts= (order_direction == -1) ?
+ saved_best_key_parts : best_key_parts;
+ }
check_reverse_order:
DBUG_ASSERT(order_direction != 0);
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 42f2abeca1e..c857fb3de8c 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -7983,7 +7983,8 @@ copy_data_between_tables(TABLE *from,TABLE *to,
if (order)
{
- if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered())
+ if (to->s->primary_key != MAX_KEY &&
+ to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX)
{
char warn_buff[MYSQL_ERRMSG_SIZE];
my_snprintf(warn_buff, sizeof(warn_buff),