diff options
author | Monty <monty@mariadb.org> | 2021-10-06 12:53:18 +0300 |
---|---|---|
committer | Sergei Petrunia <sergey@mariadb.com> | 2022-09-08 11:21:55 +0300 |
commit | 0809587fcc5844d46f590dacdb0025e996602d80 (patch) | |
tree | 1d43a24b77150801e0061aae1e113c129a44756d | |
parent | b83877d217a7d816ce4007233c150ed431b0298e (diff) | |
download | mariadb-git-0809587fcc5844d46f590dacdb0025e996602d80.tar.gz |
Fix calculation of selectivity
calculate_cond_selectivity_for_table() is largely rewritten:
- Process keys in the order of rows found, smaller ranges first. If two
ranges has equal number of rows, use the one with more key parts.
This helps us to mark more used fields to not be used for further
selectivity calculations. See cmp_quick_ranges().
- Ignore keys with fields that where used by previous keys
- Don't use rec_per_key[] to calculate selectivity for smaller
secondary key parts. This does not work as rec_per_key[] value
is calculated in the context of the previous key parts, not for the
key part itself. The one exception is if the previous key parts
is a constant.
Other things:
- Ensure that select->cond_selectivity is always between 0 and 1.
- Ensure that select->opt_range_condition_rows is never updated to
a higher value. It is initially set to the number of rows in table.
- We know store in table->opt_range_condition_rows the lowest number of
rows that any row-read-method has found so far. Before it was only done
for QUICK_SELECT_I::QS_TYPE_ROR_UNION and
QUICK_SELECT_I::QS_TYPE_INDEX_MERGE.
Now it is done for a lot more methods. See
calculate_cond_selectivity_for_table() for details.
- Calculate and use selectivity for the first key part of a multiple key
part if the first key part is a constant.
WHERE key1_part1=5 and key2_part1=5. IF key1 is used, then we can still
use selectivity for key2
Changes in test results:
- 'filtered' is slighly changed, usually to something slightly smaller.
- A few cases where for group by queries the table order changed. This was
because the number of resulting rows from a group by query with MIN/MAX
is now set to be smaller.
- A few index was changed as we know prefer index with more key parts if
the number of resulting rows is the same.
-rw-r--r-- | mysql-test/main/group_min_max.result | 4 | ||||
-rw-r--r-- | mysql-test/main/mdev-25830.result | 2 | ||||
-rw-r--r-- | mysql-test/main/opt_trace_index_merge.result | 6 | ||||
-rw-r--r-- | mysql-test/main/opt_trace_selectivity.result | 394 | ||||
-rw-r--r-- | mysql-test/main/opt_trace_selectivity.test | 52 | ||||
-rw-r--r-- | mysql-test/main/rowid_filter_innodb.result | 2 | ||||
-rw-r--r-- | sql/opt_range.cc | 260 | ||||
-rw-r--r-- | sql/opt_range.h | 7 | ||||
-rw-r--r-- | sql/sql_select.cc | 37 | ||||
-rw-r--r-- | sql/table.h | 14 |
10 files changed, 674 insertions, 104 deletions
diff --git a/mysql-test/main/group_min_max.result b/mysql-test/main/group_min_max.result index 975b6ee9a86..9fd9ecf6ce2 100644 --- a/mysql-test/main/group_min_max.result +++ b/mysql-test/main/group_min_max.result @@ -2460,8 +2460,8 @@ id select_type table type possible_keys key key_len ref rows Extra EXPLAIN SELECT 1 FROM t1 AS t1_outer WHERE a IN (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t1_outer index a a 10 NULL 15 Using where; Using index -1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t1_outer.a 1 +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 +1 PRIMARY t1_outer ref a a 5 <subquery2>.max(b) 3 Using index 2 MATERIALIZED t1 range a a 5 NULL 5 Using where; Using index EXPLAIN SELECT 1 FROM t1 AS t1_outer GROUP BY a HAVING a > (SELECT max(b) FROM t1 GROUP BY a HAVING a < 2); diff --git a/mysql-test/main/mdev-25830.result b/mysql-test/main/mdev-25830.result index e62d1ff3f55..2c606205565 100644 --- a/mysql-test/main/mdev-25830.result +++ b/mysql-test/main/mdev-25830.result @@ -47,7 +47,7 @@ WHERE task2.`sys_id` LIKE '8e7792a7dbfffb00fff8a345ca961934%' ORDER BY sysapproval_approver0.`order` LIMIT 0, 50 ; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE task2 range PRIMARY,sys_class_name_2,sys_domain_path PRIMARY 96 NULL 1 0.00 98.00 100.00 Using where; Using temporary; Using filesort +1 SIMPLE task2 range PRIMARY,sys_class_name_2,sys_domain_path PRIMARY 96 NULL 1 0.00 100.00 100.00 Using where; Using temporary; Using filesort 1 SIMPLE task1 ref PRIMARY,task_parent,sys_class_name_2,sys_domain_path task_parent 99 test.task2.sys_id 1 NULL 100.00 NULL Using index condition; Using where 1 SIMPLE sysapproval_approver0 ref sysapproval_approver_ref5,sys_domain_path,sysapproval_approver_CHG1975376 sysapproval_approver_ref5 99 test.task1.sys_id 1 NULL 100.00 NULL Using index condition; Using where drop table sysapproval_approver,task; diff --git a/mysql-test/main/opt_trace_index_merge.result b/mysql-test/main/opt_trace_index_merge.result index 885740d59c3..40136fe300a 100644 --- a/mysql-test/main/opt_trace_index_merge.result +++ b/mysql-test/main/opt_trace_index_merge.result @@ -193,7 +193,11 @@ explain select * from t1 where a=1 or b=1 { } }, { - "selectivity_for_indexes": [], + "selectivity_for_indexes": [ + { + "use_opt_range_condition_rows_selectivity": 0.002 + } + ], "selectivity_for_columns": [], "cond_selectivity": 0.002 } diff --git a/mysql-test/main/opt_trace_selectivity.result b/mysql-test/main/opt_trace_selectivity.result new file mode 100644 index 00000000000..54f05885049 --- /dev/null +++ b/mysql-test/main/opt_trace_selectivity.result @@ -0,0 +1,394 @@ +create or replace table t1 (a int, b int, c int, key(a,c), key(b,c), key (c,b)) engine=aria; +insert into t1 select seq/100+1, mod(seq,10), mod(seq,15) from seq_1_to_10000; +insert into t1 select seq/100+1, mod(seq,10), 10 from seq_1_to_1000; +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +select count(*) from t1 where a=2; +count(*) +200 +select count(*) from t1 where b=5; +count(*) +1100 +select count(*) from t1 where c=5; +count(*) +667 +select count(*) from t1 where c=10; +count(*) +1667 +select count(*) from t1 where a=2 and b=5; +count(*) +20 +select count(*) from t1 where c=10 and b=5; +count(*) +433 +select count(*) from t1 where c=5 and b=5; +count(*) +334 +set optimizer_trace="enabled=on"; +select count(*) from t1 where a=2 and b=5 and c=10; +count(*) +14 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) +[ + + [ + + { + "plan_prefix": + [ + ], + "get_costs_for_tables": + [ + + { + "best_access_path": + { + "table": "t1", + "considered_access_paths": + [ + + { + "access_type": "ref", + "index": "a", + "used_range_estimates": true, + "rows": 104, + "cost": 104.16562, + "chosen": true + }, + + { + "access_type": "ref", + "index": "b", + "used_range_estimates": true, + "rows": 340, + "cost": 340.2577963, + "chosen": false, + "cause": "cost" + }, + + { + "access_type": "ref", + "index": "c", + "used_range_estimates": true, + "rows": 632, + "cost": 632.3718449, + "chosen": false, + "cause": "cost" + }, + + { + "access_type": "index_merge", + "resulting_rows": 7, + "cost": 2.173416331, + "chosen": true + } + ], + "chosen_access_method": + { + "type": "index_merge", + "records": 7, + "cost": 2.173416331, + "uses_join_buffering": false + } + } + } + ] + }, + + { + "plan_prefix": + [ + ], + "table": "t1", + "rows_for_plan": 7, + "cost_for_plan": 3.573416331, + "estimated_join_cardinality": 7 + } + ] +] +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.009454545 + }, + + { + "index_name": "b", + "selectivity_from_index": 0.1 + }, + + { + "use_opt_range_condition_rows_selectivity": 6.363636e-4 + } + ] +] +select count(*) from t1 where a=2 and b=5 and c=5; +count(*) +3 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) +[ + + [ + + { + "plan_prefix": + [ + ], + "get_costs_for_tables": + [ + + { + "best_access_path": + { + "table": "t1", + "considered_access_paths": + [ + + { + "access_type": "ref", + "index": "a", + "used_range_estimates": true, + "rows": 6, + "cost": 6.127343464, + "chosen": true + }, + + { + "access_type": "ref", + "index": "b", + "used_range_estimates": true, + "rows": 232, + "cost": 232.2156139, + "chosen": false, + "cause": "cost" + }, + + { + "access_type": "ref", + "index": "c", + "used_range_estimates": true, + "rows": 293, + "cost": 293.2394392, + "chosen": false, + "cause": "cost" + }, + + { + "access_type": "index_merge", + "resulting_rows": 0.6, + "cost": 2.172957403, + "chosen": true + } + ], + "chosen_access_method": + { + "type": "index_merge", + "records": 0.6, + "cost": 2.172957403, + "uses_join_buffering": false + } + } + } + ] + }, + + { + "plan_prefix": + [ + ], + "table": "t1", + "rows_for_plan": 0.6, + "cost_for_plan": 2.292957403, + "estimated_join_cardinality": 0.6 + } + ] +] +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 5.454545e-4 + }, + + { + "index_name": "b", + "selectivity_from_index": 0.1 + } + ] +] +# Ensure that we only use selectivity from non used index for simple cases +select count(*) from t1 where (a=2 and b= 5); +count(*) +20 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.017545455 + }, + + { + "index_name": "b", + "selectivity_from_index": 0.073181818 + } + ] +] +# All of the following should have selectivity=1 for index 'b' +select count(*) from t1 where (a=2 and b between 0 and 100); +count(*) +200 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.017545455 + }, + + { + "index_name": "b", + "selectivity_from_index": 1 + } + ] +] +select count(*) from t1 where (a in (2,3) and b between 0 and 100); +count(*) +400 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.035090909 + }, + + { + "index_name": "b", + "selectivity_from_index": 1 + } + ] +] +select count(*) from t1 where (a>2 and b between 0 and 100); +count(*) +10702 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.973909091 + }, + + { + "index_name": "b", + "selectivity_from_index": 1 + } + ] +] +select count(*) from t1 where (a>=2 and b between 0 and 100); +count(*) +10902 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.991454545 + }, + + { + "index_name": "b", + "selectivity_from_index": 1 + } + ] +] +select count(*) from t1 where (a<=2 and b between 0 and 100); +count(*) +298 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.026181818 + }, + + { + "index_name": "b", + "selectivity_from_index": 1 + } + ] +] +select count(*) from t1 where (a<2 and b between 0 and 100); +count(*) +98 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.008636364 + }, + + { + "index_name": "b", + "selectivity_from_index": 1 + } + ] +] +select count(*) from t1 where (a between 2 and 3 and b between 0 and 100); +count(*) +400 +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) +[ + + [ + + { + "index_name": "a", + "selectivity_from_index": 0.035090909 + }, + + { + "index_name": "b", + "selectivity_from_index": 1 + } + ] +] +drop table t1; +set optimizer_trace='enabled=off'; diff --git a/mysql-test/main/opt_trace_selectivity.test b/mysql-test/main/opt_trace_selectivity.test new file mode 100644 index 00000000000..4d59d4974cd --- /dev/null +++ b/mysql-test/main/opt_trace_selectivity.test @@ -0,0 +1,52 @@ +--source include/have_sequence.inc +--source include/not_embedded.inc + +# +# Test changes in calculate_cond_selectivity_for_table() +# +create or replace table t1 (a int, b int, c int, key(a,c), key(b,c), key (c,b)) engine=aria; +insert into t1 select seq/100+1, mod(seq,10), mod(seq,15) from seq_1_to_10000; +insert into t1 select seq/100+1, mod(seq,10), 10 from seq_1_to_1000; +optimize table t1; + +select count(*) from t1 where a=2; +select count(*) from t1 where b=5; +select count(*) from t1 where c=5; +select count(*) from t1 where c=10; +select count(*) from t1 where a=2 and b=5; +select count(*) from t1 where c=10 and b=5; +select count(*) from t1 where c=5 and b=5; + +set optimizer_trace="enabled=on"; +select count(*) from t1 where a=2 and b=5 and c=10; +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +select count(*) from t1 where a=2 and b=5 and c=5; +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # Ensure that we only use selectivity from non used index for simple cases + + +select count(*) from t1 where (a=2 and b= 5); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +--echo # All of the following should have selectivity=1 for index 'b' +select count(*) from t1 where (a=2 and b between 0 and 100); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select count(*) from t1 where (a in (2,3) and b between 0 and 100); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select count(*) from t1 where (a>2 and b between 0 and 100); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select count(*) from t1 where (a>=2 and b between 0 and 100); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select count(*) from t1 where (a<=2 and b between 0 and 100); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select count(*) from t1 where (a<2 and b between 0 and 100); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select count(*) from t1 where (a between 2 and 3 and b between 0 and 100); +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.selectivity_for_indexes')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +drop table t1; +set optimizer_trace='enabled=off'; diff --git a/mysql-test/main/rowid_filter_innodb.result b/mysql-test/main/rowid_filter_innodb.result index 90a119aa8c8..2c804079a9d 100644 --- a/mysql-test/main/rowid_filter_innodb.result +++ b/mysql-test/main/rowid_filter_innodb.result @@ -2449,7 +2449,7 @@ id y x explain extended select * from t1 join t2 on t1.id = t2.x where t2.y = 2 and t1.id = 1; id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t1 const PRIMARY PRIMARY 4 const 1 100.00 Using index -1 SIMPLE t2 ref x,y y 5 const 2 100.00 Using where +1 SIMPLE t2 ref x,y y 5 const 2 83.33 Using where Warnings: Note 1003 select 1 AS `id`,`test`.`t2`.`y` AS `y`,`test`.`t2`.`x` AS `x` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`y` = 2 and `test`.`t2`.`x` = 1 drop table t1, t2; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index b4bd95c7719..108e4ddb129 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2749,7 +2749,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, KEY_PART *key_parts; KEY *key_info; PARAM param; - bool force_group_by = false; + bool force_group_by= false, group_by_optimization_used= false; if (check_stack_overrun(thd, 2*STACK_MIN_SIZE + sizeof(PARAM), buff)) DBUG_RETURN(0); // Fatal error flag is set @@ -3040,8 +3040,14 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, restore_nonrange_trees(¶m, tree, backup_keys); if ((group_trp= get_best_group_min_max(¶m, tree, read_time))) { - param.table->opt_range_condition_rows= MY_MIN(group_trp->records, - table_records); + /* mark that we are changing opt_range_condition_rows */ + group_by_optimization_used= 1; + set_if_smaller(param.table->opt_range_condition_rows, group_trp->records); + DBUG_PRINT("info", ("table_rows: %llu opt_range_condition_rows: %llu " + "group_trp->records: %ull", + table_records, param.table->opt_range_condition_rows, + group_trp->records)); + Json_writer_object grp_summary(thd, "best_group_range_summary"); if (unlikely(thd->trace_started())) @@ -3071,6 +3077,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, delete quick; quick= NULL; } + else + quick->group_by_optimization_used= group_by_optimization_used; } possible_keys= param.possible_keys; @@ -3316,12 +3324,12 @@ double records_in_column_ranges(PARAM *param, uint idx, */ static -int cmp_quick_ranges(TABLE *table, uint *a, uint *b) +int cmp_quick_ranges(TABLE::OPT_RANGE **a, TABLE::OPT_RANGE **b) { - int tmp= CMP_NUM(table->opt_range[*a].rows, table->opt_range[*b].rows); + int tmp=CMP_NUM((*a)->rows, (*b)->rows); if (tmp) return tmp; - return -CMP_NUM(table->opt_range[*a].key_parts, table->opt_range[*b].key_parts); + return -CMP_NUM((*a)->key_parts, (*b)->key_parts); } @@ -3366,14 +3374,14 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) { uint keynr, range_index, ranges; MY_BITMAP *used_fields= &table->cond_set; - double table_records= (double)table->stat_records(); - uint optimal_key_order[MAX_KEY]; + double table_records= (double)table->stat_records(), original_selectivity; + TABLE::OPT_RANGE *optimal_key_order[MAX_KEY]; MY_BITMAP handled_columns; my_bitmap_map *buf; QUICK_SELECT_I *quick; DBUG_ENTER("calculate_cond_selectivity_for_table"); - table->cond_selectivity= 1.0; + table->set_cond_selectivity(1.0); if (table_records == 0) DBUG_RETURN(FALSE); @@ -3381,7 +3389,10 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) if ((quick=table->reginfo.join_tab->quick) && quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) { - table->cond_selectivity*= (quick->records/table_records); + DBUG_ASSERT(table->opt_range_condition_rows <= quick->records); + table->set_cond_selectivity(MY_MIN(quick->records, + table->opt_range_condition_rows)/ + table_records); DBUG_RETURN(FALSE); } @@ -3413,93 +3424,135 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) */ for (ranges= keynr= 0 ; keynr < table->s->keys; keynr++) if (table->opt_range_keys.is_set(keynr)) - optimal_key_order[ranges++]= keynr; + optimal_key_order[ranges++]= table->opt_range + keynr; - my_qsort2(optimal_key_order, ranges, - sizeof(optimal_key_order[0]), - (qsort2_cmp) cmp_quick_ranges, table); + my_qsort(optimal_key_order, ranges, + sizeof(optimal_key_order[0]), + (qsort_cmp) cmp_quick_ranges); for (range_index= 0 ; range_index < ranges ; range_index++) { - uint keynr= optimal_key_order[range_index]; + TABLE::OPT_RANGE *range= optimal_key_order[range_index]; + uint keynr= (uint)(range - table->opt_range); + uint i; + uint used_key_parts= range->key_parts; + double quick_cond_selectivity= (range->rows / table_records); + KEY *key_info= table->key_info + keynr; + KEY_PART_INFO* key_part= key_info->key_part; + DBUG_ASSERT(quick_cond_selectivity <= 1.0); + + /* + Suppose, there are range conditions on these keys + KEY1 (col1, col2) + KEY2 (col2, col6) + KEY3 (col3, col2) + KEY4 (col4, col5) + + We don't want to count selectivity for ranges that uses a column + that was used before. + If the first column of an index was not used before, we can use the + key part statistics to calculate selectivity for this column. We cannot + calculate statistics for any other columns as the key part statistics + is also depending on the values of the previous key parts and not only + the last key part. + + In other words, if KEY1 has the smallest range, we will only use first + part of KEY3 and range of KEY4 to calculate selectivity. + */ + for (i= 0; i < used_key_parts; i++) { + if (bitmap_is_set(&handled_columns, key_part[i].fieldnr-1)) { - uint i; - uint used_key_parts= table->opt_range[keynr].key_parts; - double quick_cond_selectivity= (table->opt_range[keynr].rows / - table_records); - KEY *key_info= table->key_info + keynr; - KEY_PART_INFO* key_part= key_info->key_part; - /* - Suppose, there are range conditions on two keys - KEY1 (col1, col2) - KEY2 (col3, col2) - - we don't want to count selectivity of condition on col2 twice. - - First, find the longest key prefix that's made of columns whose - selectivity wasn't already accounted for. - */ - for (i= 0; i < used_key_parts; i++, key_part++) - { - if (bitmap_is_set(&handled_columns, key_part->fieldnr-1)) - break; - bitmap_set_bit(&handled_columns, key_part->fieldnr-1); - } - if (i) + double rec_per_key; + if (!i) { - double UNINIT_VAR(selectivity_mult); - - /* - There is at least 1-column prefix of columns whose selectivity has - not yet been accounted for. - */ - table->cond_selectivity*= quick_cond_selectivity; - Json_writer_object selectivity_for_index(thd); - selectivity_for_index.add("index_name", key_info->name) - .add("selectivity_from_index", - quick_cond_selectivity); - if (i != used_key_parts) - { - /* - Range access got us estimate for #used_key_parts. - We need estimate for #(i-1) key parts. - */ - double f1= key_info->actual_rec_per_key(i-1); - double f2= key_info->actual_rec_per_key(i); - if (f1 > 0 && f2 > 0) - selectivity_mult= f1 / f2; - else - { - /* - No statistics available, assume the selectivity is proportional - to the number of key parts. - (i=0 means 1 keypart, i=1 means 2 keyparts, so use i+1) - */ - selectivity_mult= ((double)(i+1)) / i; - } - table->cond_selectivity*= selectivity_mult; - selectivity_for_index.add("selectivity_multiplier", - selectivity_mult); - } /* - We need to set selectivity for fields supported by indexes. - For single-component indexes and for some first components - of other indexes we do it here. For the remaining fields - we do it later in this function, in the same way as for the - fields not used in any indexes. - */ - if (i == 1) - { - uint fieldnr= key_info->key_part[0].fieldnr; - table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity; - if (i != used_key_parts) - table->field[fieldnr-1]->cond_selectivity*= selectivity_mult; - bitmap_clear_bit(used_fields, fieldnr-1); - } + We cannot use this key part for selectivity calculation as + key_info->actual_rec_per_key for later keys are depending on the + distribution of the previous key parts. + */ + goto end_of_range_loop; } + /* + A later key part was already used. We can still use key + statistics for the first key part to get some approximation + of the selectivity of this key. This can be done if the + first key part is a constant: + WHERE key1_part1=1 and key2_part1=5 and key2_part2 BETWEEN 0 and 10 + Even if key1 is used and it also includes the field for key2_part1 + as a key part, we can still use selectivity for key2_part1 + */ + if ((rec_per_key= key_info->actual_rec_per_key(0)) == 0.0 || + !range->first_key_part_has_only_one_value) + goto end_of_range_loop; + /* + Use key distribution statistics, except if range selectivity + is bigger. This can happen if the used key value has more + than an average number of instances. + */ + set_if_smaller(rec_per_key, rows2double(table->file->stats.records)); + set_if_bigger(quick_cond_selectivity, + rec_per_key / table->file->stats.records); + used_key_parts= 1; + break; } } + /* Set bits only after we have checked the used columns */ + for (i= 0; i < used_key_parts; i++, key_part++) + bitmap_set_bit(&handled_columns, key_part->fieldnr-1); + + /* + There is at least 1-column prefix of columns whose selectivity has + not yet been accounted for. + */ + table->multiply_cond_selectivity(quick_cond_selectivity); + + { + Json_writer_object selectivity_for_index(thd); + selectivity_for_index.add("index_name", key_info->name) + .add("selectivity_from_index", + quick_cond_selectivity); + } + /* + We need to set selectivity for fields supported by indexes. + For single-component indexes and for some first components + of other indexes we do it here. For the remaining fields + we do it later in this function, in the same way as for the + fields not used in any indexes. + */ + if (used_key_parts == 1) + { + uint fieldnr= key_info->key_part[0].fieldnr; + table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity; + DBUG_ASSERT(table->field[fieldnr-1]->cond_selectivity <= 1.0); + /* + Reset bit in used_fields to ensure this field is ignored in the loop + below. + */ + bitmap_clear_bit(used_fields, fieldnr-1); + } +end_of_range_loop: + continue; + } + /* + Take into account number of matching rows calculated by + get_best_ror_intersect() stored in table->opt_range_condition_rows + Use the smaller found selectivity. + */ + original_selectivity= (table->opt_range_condition_rows / + table_records); + if (original_selectivity < table->cond_selectivity) + { + DBUG_ASSERT(quick && + (quick->group_by_optimization_used || + quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT || + quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)); + Json_writer_object selectivity_for_index(thd); + table->cond_selectivity= original_selectivity; + selectivity_for_index.add("use_opt_range_condition_rows_selectivity", + original_selectivity); } selectivity_for_indexes.end(); @@ -3582,6 +3635,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) if (rows != DBL_MAX) { key->field->cond_selectivity= rows/table_records; + DBUG_ASSERT(key->field->cond_selectivity <= 1.0); selectivity_for_column.add("selectivity_from_histogram", key->field->cond_selectivity); } @@ -3596,7 +3650,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) table_field->cond_selectivity < 1.0) { if (!bitmap_is_set(&handled_columns, table_field->field_index)) - table->cond_selectivity*= table_field->cond_selectivity; + table->multiply_cond_selectivity(table_field->cond_selectivity); } } @@ -3608,12 +3662,6 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) } selectivity_for_columns.end(); - if (quick && (quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_UNION || - quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)) - { - table->cond_selectivity*= (quick->records/table_records); - } - bitmap_union(used_fields, &handled_columns); /* Check if we can improve selectivity estimates by using sampling */ @@ -3651,7 +3699,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) DBUG_PRINT("info", ("The predicate selectivity : %g", (double)stat->positive / examined_rows)); double selectivity= ((double)stat->positive) / examined_rows; - table->cond_selectivity*= selectivity; + table->multiply_cond_selectivity(selectivity); /* If a field is involved then we register its selectivity in case there in an equality with the field. @@ -11519,6 +11567,26 @@ void SEL_ARG::test_use_count(SEL_ARG *root) } #endif + +/** + Check if first key part has only one value + + @retval 1 yes + @retval 0 no +*/ + +static bool check_if_first_key_part_has_only_one_value(SEL_ARG *arg) +{ + if (arg->left != &null_element || arg->right != &null_element) + return 0; // Multiple key values + if ((arg->min_flag | arg->max_flag) & (NEAR_MIN | NEAR_MAX)) + return 0; + if (unlikely(arg->type != SEL_ARG::KEY_RANGE)) // Not a valid range + return 0; + return arg->min_value == arg->max_value || !arg->cmp_min_to_max(arg); +} + + /* Calculate cost and E(#rows) for a given index and intervals tree @@ -11640,6 +11708,8 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, range->index_only_cost= 0; else range->index_only_cost= cost->index_only_cost(); + range->first_key_part_has_only_one_value= + check_if_first_key_part_has_only_one_value(tree); } } @@ -11672,6 +11742,8 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, *is_ror_scan= seq.is_ror_scan; DBUG_PRINT("exit", ("Records: %lu", (ulong) rows)); + DBUG_ASSERT(rows == HA_POS_ERROR || + rows <= MY_MAX(param->table->stat_records(), 1)); DBUG_RETURN(rows); //psergey-merge:todo: maintain first_null_comp. } diff --git a/sql/opt_range.h b/sql/opt_range.h index 1f4d3f1cd1f..e5782248c56 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1109,6 +1109,13 @@ public: */ uint used_key_parts; + /* + Set to 1 if we used group by optimization to calculate number of rows + in the result, stored in table->opt_range_condition_rows. + This is only used for asserts. + */ + bool group_by_optimization_used; + QUICK_SELECT_I(); virtual ~QUICK_SELECT_I(){}; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9ee0d7d2b14..0ac458e6ba3 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5810,7 +5810,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, get_delayed_table_estimates(s->table, &s->records, &s->read_time, &s->startup_cost); s->found_records= s->records; - table->opt_range_condition_rows=s->records; + s->table->opt_range_condition_rows= s->records; + s->table->used_stat_records= s->records; } else s->scan_time(); @@ -5834,8 +5835,12 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, */ add_group_and_distinct_keys(join, s); - s->table->cond_selectivity= 1.0; - + /* This will be updated in calculate_cond_selectivity_for_table() */ + s->table->set_cond_selectivity(1.0); + DBUG_ASSERT(s->table->used_stat_records == 0 || + s->table->cond_selectivity <= + s->table->opt_range_condition_rows / + s->table->used_stat_records); /* Perform range analysis if there are keys it could use (1). Don't do range analysis for materialized subqueries (2). @@ -7716,6 +7721,15 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint, TABLE *table= s->table; double sel= table->cond_selectivity; double table_records= rows2double(s->records); + DBUG_ASSERT(sel >= 0 && sel <= 1.0); + /* + table->cond_selectivity will include data from opt_range. + Here we check that this is indeeded the case. + Note that if table_records == 0, then 'sel' is probably 1 + */ + DBUG_ASSERT(table_records == 0 || + sel <= s->table->opt_range_condition_rows / + table_records); dbl_records= table_records * sel; return dbl_records; } @@ -9829,7 +9843,11 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, However if sel becomes greater than 2 then with high probability something went wrong. */ - sel /= (double)table->opt_range[key].rows / (double) table->stat_records(); + DBUG_ASSERT(sel <= 1.0); + DBUG_ASSERT(table->opt_range[key].rows <= + (double) table->stat_records()); + sel /= ((double) table->opt_range[key].rows / + (double) table->stat_records()); set_if_smaller(sel, 1.0); used_range_selectivity= true; } @@ -9973,6 +9991,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, exit: if (ref_keyuse_steps != ref_keyuse_steps_buf) my_free(ref_keyuse_steps); + DBUG_ASSERT(sel >= 0.0 and sel <= 1.0); return sel; } @@ -11403,6 +11422,7 @@ bool JOIN::get_best_combination() */ j->records_read= best_positions[tablenr].records_read; j->cond_selectivity= best_positions[tablenr].cond_selectivity; + DBUG_ASSERT(j->cond_selectivity <= 1.0); map2table[j->table->tablenr]= j; /* If we've reached the end of sjm nest, switch back to main sequence */ @@ -27705,6 +27725,15 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, KEY *key_info= 0; uint key_len= 0; +#ifdef NOT_YET + /* + Would be good to keep this condition up to date. + Another alternative is to remove JOIN_TAB::cond_selectivity and use + TABLE::cond_selectivity everywhere + */ + DBUG_ASSERT(cond_selectivity == table->cond_selectivity); +#endif + explain_plan= eta; eta->key.clear(); eta->quick_info= NULL; diff --git a/sql/table.h b/sql/table.h index 72db38a82f2..bb02a51678e 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1377,6 +1377,7 @@ public: index only access for it is stored in index_only_costs[i] */ double index_only_cost; + bool first_key_part_has_only_one_value; } *opt_range; /* Bitmaps of key parts that =const for the duration of join execution. If @@ -1810,7 +1811,18 @@ public: DBUG_ASSERT(s->period.name); return field[s->period.end_fieldno]; } - + void set_cond_selectivity(double selectivity) + { + DBUG_ASSERT(selectivity >= 0.0 && selectivity <= 1.0); + cond_selectivity= selectivity; + DBUG_PRINT("info", ("cond_selectivity: %g", cond_selectivity)); + } + void multiply_cond_selectivity(double selectivity) + { + DBUG_ASSERT(selectivity >= 0.0 && selectivity <= 1.0); + cond_selectivity*= selectivity; + DBUG_PRINT("info", ("cond_selectivity: %g", cond_selectivity)); + } ulonglong vers_start_id() const; ulonglong vers_end_id() const; |