diff options
author | Michael Widenius <monty@askmonty.org> | 2011-05-18 19:26:30 +0300 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2011-05-18 19:26:30 +0300 |
commit | 3631146442bc09e5129334774b4bcdf2470a4b27 (patch) | |
tree | c812cd2520a8af89c95abf5e6fbec47998908c58 /sql | |
parent | 5c70f813f3d12974bf67d6ff98c0ff4b9ca71d7d (diff) | |
download | mariadb-git-3631146442bc09e5129334774b4bcdf2470a4b27.tar.gz |
Original idea from Zardosht Kasheff to add HA_CLUSTERED_INDEX
- Added a lot of code comments
- Updated get_best_ror_intersec() to prefer index scan on not clustered keys before clustered keys.
- Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY
- For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered()
- Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect.
sql/ha_partition.h:
Added comment with warning for code unsafe to use with multiple storage engines at the same time
sql/handler.h:
Added HA_CLUSTERED_INDEX.
Documented primary_key_is_clustered()
sql/opt_range.cc:
Added code comments
Updated get_best_ror_intersec() to ignore clustered keys.
Optimized away cpk_scan_used and one instance of current_thd (Simpler code)
Use HA_CLUSTERED_INDEX to define if one should use HA_MRR_INDEX_ONLY
sql/sql_select.cc:
Changed comment to #ifdef
For test of using index or filesort to resolve ORDER BY, use HA_CLUSTERED_INDEX flag instead of primary_key_is_clustered()
(Change is smaller than what it looks beause of indentation change)
sql/sql_table.cc:
Use HA_TABLE_SCAN_ON_INDEX instead of primary_key_is_clustered() to decide if ALTER TABLE ... ORDER BY will have any effect.
storage/innobase/handler/ha_innodb.h:
Added support for HA_CLUSTERED_INDEX
storage/innodb_plugin/handler/ha_innodb.cc:
Added support for HA_CLUSTERED_INDEX
storage/xtradb/handler/ha_innodb.cc:
Added support for HA_CLUSTERED_INDEX
Diffstat (limited to 'sql')
-rw-r--r-- | sql/ha_partition.h | 4 | ||||
-rw-r--r-- | sql/handler.h | 26 | ||||
-rw-r--r-- | sql/opt_range.cc | 54 | ||||
-rw-r--r-- | sql/sql_select.cc | 77 | ||||
-rw-r--r-- | sql/sql_table.cc | 3 |
5 files changed, 107 insertions, 57 deletions
diff --git a/sql/ha_partition.h b/sql/ha_partition.h index b1e39cf4d22..49aa7103e1c 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -877,6 +877,10 @@ public: */ virtual ulong index_flags(uint inx, uint part, bool all_parts) const { + /* + The following code is not safe if you are using different + storage engines or different index types per partition. + */ return m_file[0]->index_flags(inx, part, all_parts); } diff --git a/sql/handler.h b/sql/handler.h index d929181088a..ea709d9dddc 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -161,8 +161,11 @@ */ #define HA_KEY_SCAN_NOT_ROR 128 #define HA_DO_INDEX_COND_PUSHDOWN 256 /* Supports Index Condition Pushdown */ - - +/* + Data is clustered on this key. This means that when you read the key + you also get the row data in the same block. +*/ +#define HA_CLUSTERED_INDEX 512 /* bits in alter_table_flags: @@ -2311,9 +2314,22 @@ public: /* - @retval TRUE Primary key (if there is one) is clustered - key covering all fields - @retval FALSE otherwise + Check if the primary key (if there is one) is a clustered key covering + all fields. This means: + + - Data is stored together with the primary key (no secondary lookup + needed to find the row data). The optimizer uses this to find out + the cost of fetching data. + - The primary key is part of each secondary key and is used + to find the row data in the primary index when reading trough + secondary indexes. + - When doing a HA_KEYREAD_ONLY we get also all the primary key parts + into the row. This is critical property used by index_merge. + + For a clustered primary key, index_flags() returns also HA_CLUSTERED_INDEX + + @retval TRUE yes + @retval FALSE No. */ virtual bool primary_key_is_clustered() { return FALSE; } virtual int cmp_ref(const uchar *ref1, const uchar *ref2) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 102fd881d9f..13afffd6ba9 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -1816,6 +1816,12 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() DBUG_VOID_RETURN; } +/* + QUICK_INDEX_SORT_SELECT works as follows: + - Do index scans, accumulate rowids in the Unique object + (Unique will also sort and de-duplicate rowids) + - Use rowids from unique to run a disk-ordered sweep +*/ QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param, TABLE *table) @@ -1848,7 +1854,18 @@ QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) if (head->file->primary_key_is_clustered() && quick_sel_range->index == head->s->primary_key) { - /* A quick_select over a clustered primary key is handled specifically */ + /* + A quick_select over a clustered primary key is handled specifically + Here we assume: + - PK columns are included in any other merged index + - Scan on the PK is disk-ordered. + (not meeting #2 will only cause performance degradation) + + We could treat clustered PK as any other index, but that would + be inefficient. There is no point in doing scan on + CPK, remembering the rowid, then making rnd_pos() call with + that rowid. + */ pk_quick_select= quick_sel_range; DBUG_RETURN(0); } @@ -4298,11 +4315,19 @@ double get_sweep_read_cost(const PARAM *param, ha_rows records) DBUG_ENTER("get_sweep_read_cost"); if (param->table->file->primary_key_is_clustered()) { + /* + We are using the primary key to find the rows. + Calculate the cost for this. + */ result= param->table->file->read_time(param->table->s->primary_key, (uint)records, records); } else { + /* + Rows will be retreived with rnd_pos(). Caluclate the expected + cost for this. + */ double n_blocks= ceil(ulonglong2double(param->table->file->stats.data_file_length) / IO_SIZE); @@ -5013,7 +5038,7 @@ bool prepare_search_best_index_intersect(PARAM *param, if ((*index_scan)->keynr == table->s->primary_key) { common->cpk_scan= cpk_scan= *index_scan; - break; + break; } } } @@ -6187,7 +6212,6 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, ROR_SCAN_INFO **cur_ror_scan; ROR_SCAN_INFO *cpk_scan= NULL; uint cpk_no; - bool cpk_scan_used= FALSE; if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, sizeof(ROR_SCAN_INFO*)* @@ -6199,11 +6223,20 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++) { ROR_SCAN_INFO *scan; + uint key_no; if (!tree->ror_scans_map.is_set(idx)) continue; + key_no= param->real_keynr[idx]; + if (key_no != cpk_no && + param->table->file->index_flags(key_no,0,0) & HA_CLUSTERED_INDEX) + { + /* Ignore clustering keys */ + tree->n_ror_scans--; + continue; + } if (!(scan= make_ror_scan(param, idx, tree->keys[idx]))) return NULL; - if (param->real_keynr[idx] == cpk_no) + if (key_no == cpk_no) { cpk_scan= scan; tree->n_ror_scans--; @@ -6289,15 +6322,14 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, { if (ror_intersect_add(intersect, cpk_scan, TRUE) && (intersect->total_cost < min_cost)) - { - cpk_scan_used= TRUE; intersect_best= intersect; //just set pointer here - } } + else + cpk_scan= 0; // Don't use cpk_scan /* Ok, return ROR-intersect plan if we have found one */ TRP_ROR_INTERSECT *trp= NULL; - if (min_cost < read_time && (cpk_scan_used || best_num > 1)) + if (min_cost < read_time && (cpk_scan || best_num > 1)) { if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) DBUG_RETURN(trp); @@ -6316,7 +6348,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, set_if_smaller(param->table->quick_condition_rows, best_rows); trp->records= best_rows; trp->index_scan_costs= intersect_best->index_scan_costs; - trp->cpk_scan= cpk_scan_used? cpk_scan: NULL; + trp->cpk_scan= cpk_scan; DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:" "cost %g, records %lu", trp->read_cost, (ulong) trp->records)); @@ -9511,10 +9543,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, bool pk_is_clustered= file->primary_key_is_clustered(); if (index_only && (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) && - !(pk_is_clustered && keynr == param->table->s->primary_key)) + !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX)) *mrr_flags |= HA_MRR_INDEX_ONLY; - if (current_thd->lex->sql_command != SQLCOM_SELECT) + if (param->thd->lex->sql_command != SQLCOM_SELECT) *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; *bufsize= param->thd->variables.mrr_buff_size; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 103a228ebb6..908f518fe41 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8550,17 +8550,19 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) else if (!table->covering_keys.is_clear_all() && !(tab->select && tab->select->quick)) { // Only read index tree +#ifdef BAD_OPTIMIZATION /* - It has turned out that the below change, while speeding things - up for disk-bound loads, slows them down for cases when the data - is in disk cache (see BUG#35850): - // See bug #26447: "Using the clustered index for a table scan - // is always faster than using a secondary index". + It has turned out that the below change, while speeding things + up for disk-bound loads, slows them down for cases when the data + is in disk cache (see BUG#35850): + See bug #26447: "Using the clustered index for a table scan + is always faster than using a secondary index". + */ if (table->s->primary_key != MAX_KEY && table->file->primary_key_is_clustered()) tab->index= table->s->primary_key; else - */ +#endif tab->index=find_shortest_key(table, & table->covering_keys); tab->read_first_record= join_read_first; /* Read with index_first / index_next */ @@ -16525,9 +16527,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, */ DBUG_ASSERT (ref_key != (int) nr); - bool is_covering= table->covering_keys.is_set(nr) || - (nr == table->s->primary_key && - table->file->primary_key_is_clustered()); + bool is_covering= (table->covering_keys.is_set(nr) || + (table->file->index_flags(nr, 0, 1) & + HA_CLUSTERED_INDEX)); /* Don't use an index scan with ORDER BY without limit. @@ -16680,42 +16682,37 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, /* filesort() and join cache are usually faster than reading in index order and not using join cache, except in case that chosen - index is clustered primary key. + index is clustered key. */ - if ((select_limit >= table_records) && - (tab->type == JT_ALL && - tab->join->tables > tab->join->const_tables + 1) && - ((unsigned) best_key != table->s->primary_key || - !table->file->primary_key_is_clustered())) + if (best_key < 0 || + ((select_limit >= table_records) && + (tab->type == JT_ALL && + tab->join->tables > tab->join->const_tables + 1) && + !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX))) goto use_filesort; - if (best_key >= 0) + if (table->quick_keys.is_set(best_key) && best_key != ref_key) { - if (table->quick_keys.is_set(best_key) && best_key != ref_key) - { - key_map map; - map.clear_all(); // Force the creation of quick select - map.set_bit(best_key); // only best_key. - select->quick= 0; - select->test_quick_select(join->thd, map, 0, - join->select_options & OPTION_FOUND_ROWS ? - HA_POS_ERROR : - join->unit->select_limit_cnt, - TRUE, FALSE); - } - order_direction= best_key_direction; - /* - saved_best_key_parts is actual number of used keyparts found by the - test_if_order_by_key function. It could differ from keyinfo->key_parts, - thus we have to restore it in case of desc order as it affects - QUICK_SELECT_DESC behaviour. - */ - used_key_parts= (order_direction == -1) ? - saved_best_key_parts : best_key_parts; + key_map map; + map.clear_all(); // Force the creation of quick select + map.set_bit(best_key); // only best_key. + select->quick= 0; + select->test_quick_select(join->thd, map, 0, + join->select_options & OPTION_FOUND_ROWS ? + HA_POS_ERROR : + join->unit->select_limit_cnt, + TRUE, FALSE); } - else - goto use_filesort; - } + order_direction= best_key_direction; + /* + saved_best_key_parts is actual number of used keyparts found by the + test_if_order_by_key function. It could differ from keyinfo->key_parts, + thus we have to restore it in case of desc order as it affects + QUICK_SELECT_DESC behaviour. + */ + used_key_parts= (order_direction == -1) ? + saved_best_key_parts : best_key_parts; + } check_reverse_order: DBUG_ASSERT(order_direction != 0); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 42f2abeca1e..c857fb3de8c 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -7983,7 +7983,8 @@ copy_data_between_tables(TABLE *from,TABLE *to, if (order) { - if (to->s->primary_key != MAX_KEY && to->file->primary_key_is_clustered()) + if (to->s->primary_key != MAX_KEY && + to->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) { char warn_buff[MYSQL_ERRMSG_SIZE]; my_snprintf(warn_buff, sizeof(warn_buff), |