diff options
author | Monty <monty@mariadb.org> | 2021-10-08 02:36:58 +0300 |
---|---|---|
committer | Monty <monty@mariadb.org> | 2022-07-07 19:51:43 +0300 |
commit | 4c1ca5d30107c0f0031f96aa5aeea58f23110b88 (patch) | |
tree | 354546c81a307432eb231dae325ef4dd9226235e | |
parent | 4d587b440c062231d30c4d30397a6621f11dc8f4 (diff) | |
download | mariadb-git-4c1ca5d30107c0f0031f96aa5aeea58f23110b88.tar.gz |
Adjust costs for doing index scan in cost_group_min_max()
The idea is that when doing a tree dive (once per group), we need to
compare key values, which is fast. For each new group, we have to
compare the full where clause for the row.
Compared to original code, the cost of group_min_max() has slightly
increased which affects some test with only a few rows.
main.group_min_max and main.distinct have been modified to show the
effect of the change.
The patch also adjust the number of groups in case of quick selects:
- For simple WHERE clauses, ensure that we have at least as many groups
as we have conditions on the used group-by key parts.
The assumption is that each condition will create at least one group.
- Ensure that there are no more groups than rows found by quick_select
Test changes:
- For some small tables there has been a change of
Using index for group-by -> Using index for group-by (scanning)
Range -> Index and Using index for group-by -> Using index
-rw-r--r-- | mysql-test/main/derived_cond_pushdown.result | 6 | ||||
-rw-r--r-- | mysql-test/main/distinct.result | 14 | ||||
-rw-r--r-- | mysql-test/main/distinct.test | 6 | ||||
-rw-r--r-- | mysql-test/main/group_by.result | 13 | ||||
-rw-r--r-- | mysql-test/main/group_by.test | 7 | ||||
-rw-r--r-- | mysql-test/main/group_min_max.result | 44 | ||||
-rw-r--r-- | mysql-test/main/group_min_max.test | 22 | ||||
-rw-r--r-- | mysql-test/main/group_min_max_innodb.result | 6 | ||||
-rw-r--r-- | mysql-test/main/opt_trace.result | 121 | ||||
-rw-r--r-- | mysql-test/main/partition_range.result | 7 | ||||
-rw-r--r-- | mysql-test/main/partition_range.test | 1 | ||||
-rw-r--r-- | mysql-test/main/subselect_mat.result | 6 | ||||
-rw-r--r-- | mysql-test/main/subselect_sj_mat.result | 6 | ||||
-rw-r--r-- | mysql-test/suite/gcol/r/gcol_select_innodb.result | 2 | ||||
-rw-r--r-- | mysql-test/suite/gcol/r/gcol_select_myisam.result | 2 | ||||
-rw-r--r-- | mysql-test/suite/vcol/r/vcol_select_innodb.result | 2 | ||||
-rw-r--r-- | mysql-test/suite/vcol/r/vcol_select_myisam.result | 2 | ||||
-rw-r--r-- | sql/opt_range.cc | 55 |
18 files changed, 220 insertions, 102 deletions
diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result index 9059fa63fed..c3a0357f92e 100644 --- a/mysql-test/main/derived_cond_pushdown.result +++ b/mysql-test/main/derived_cond_pushdown.result @@ -17374,7 +17374,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY <subquery3> eq_ref distinct_key distinct_key 4 func 1 1 PRIMARY <derived2> ref key0 key0 5 test.t3.id 2 3 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 -2 DERIVED cp2 range NULL a 5 NULL 7 Using index for group-by +2 DERIVED cp2 index NULL a 5 NULL 7 Using index explain format=json select * from t1, (select a from t1 cp2 group by a) dt, t3 where dt.a = t1.a and t1.a = t3.id and t1.a in (select a from t2); EXPLAIN @@ -17437,13 +17437,13 @@ EXPLAIN "select_id": 2, "table": { "table_name": "cp2", - "access_type": "range", + "access_type": "index", "key": "a", "key_length": "5", "used_key_parts": ["a"], "rows": 7, "filtered": 100, - "using_index_for_group_by": true + "using_index": true } } } diff --git a/mysql-test/main/distinct.result b/mysql-test/main/distinct.result index eb7f3923042..f48b2812972 100644 --- a/mysql-test/main/distinct.result +++ b/mysql-test/main/distinct.result @@ -538,10 +538,10 @@ PRIMARY KEY (a,b)); INSERT INTO t2 VALUES (1,1,1,50), (1,2,3,40), (2,1,3,4); EXPLAIN SELECT DISTINCT a FROM t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range NULL PRIMARY 4 NULL 3 Using index for group-by +1 SIMPLE t2 index NULL PRIMARY 8 NULL 3 Using index EXPLAIN SELECT DISTINCT a,a FROM t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range NULL PRIMARY 4 NULL 3 Using index for group-by +1 SIMPLE t2 index NULL PRIMARY 8 NULL 3 Using index EXPLAIN SELECT DISTINCT b,a FROM t2; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 index NULL PRIMARY 8 NULL 3 Using index @@ -754,9 +754,6 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1), (1, 2, 1), (1, 2, 2), (1, 2, 3); -EXPLAIN SELECT DISTINCT a, b, d, c FROM t1; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL PRIMARY 16 NULL 6 Using index for group-by; Using temporary SELECT DISTINCT a, b, d, c FROM t1; a b d c 1 1 0 1 @@ -765,6 +762,13 @@ a b d c 1 2 0 1 1 2 0 2 1 2 0 3 +EXPLAIN SELECT DISTINCT a, b, d, c FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 16 NULL 6 Using index +INSERT INTO t1 SELECT seq/10,seq/10,seq/10,seq/10,seq from seq_1_to_100; +EXPLAIN SELECT DISTINCT a, b, d, c FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range NULL PRIMARY 16 NULL 10 Using index for group-by; Using temporary DROP TABLE t1; # # Bug #46159: simple query that never returns diff --git a/mysql-test/main/distinct.test b/mysql-test/main/distinct.test index 32e189da98a..2f10d866560 100644 --- a/mysql-test/main/distinct.test +++ b/mysql-test/main/distinct.test @@ -4,6 +4,7 @@ # --source include/default_optimizer_switch.inc +--source include/have_sequence.inc --disable_warnings drop table if exists t1,t2,t3; --enable_warnings @@ -574,9 +575,10 @@ INSERT INTO t1(a, b, c) VALUES (1, 1, 1), (1, 2, 2), (1, 2, 3); -EXPLAIN SELECT DISTINCT a, b, d, c FROM t1; - SELECT DISTINCT a, b, d, c FROM t1; +EXPLAIN SELECT DISTINCT a, b, d, c FROM t1; +INSERT INTO t1 SELECT seq/10,seq/10,seq/10,seq/10,seq from seq_1_to_100; +EXPLAIN SELECT DISTINCT a, b, d, c FROM t1; DROP TABLE t1; diff --git a/mysql-test/main/group_by.result b/mysql-test/main/group_by.result index f3b55a1e06a..d0273fe2090 100644 --- a/mysql-test/main/group_by.result +++ b/mysql-test/main/group_by.result @@ -1578,7 +1578,7 @@ id select_type table type possible_keys key key_len ref rows Extra EXPLAIN SELECT a FROM t1 FORCE INDEX FOR JOIN (i2) FORCE INDEX FOR GROUP BY (i2) GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL i2 4 NULL 144 Using index for group-by +1 SIMPLE t1 index NULL i2 9 NULL 144 Using index EXPLAIN SELECT a FROM t1 USE INDEX () IGNORE INDEX (i2); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 144 @@ -1701,7 +1701,7 @@ NULL 1 1 2 EXPLAIN SELECT a from t2 GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range NULL a 5 NULL 6 Using index for group-by +1 SIMPLE t2 index NULL a 10 NULL 6 Using index SELECT a from t2 GROUP BY a; a NULL @@ -1714,6 +1714,11 @@ b NULL 1 2 +insert into t2 SELECT NULL, NULL from seq_1_to_10; +# Expect: Using index for group-by +EXPLAIN SELECT b from t2 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range NULL a 5 NULL 9 Using index for group-by DROP TABLE t1; DROP TABLE t2; CREATE TABLE t1 ( a INT, b INT ); @@ -2452,7 +2457,7 @@ test.t1 analyze status OK EXPLAIN SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range b b 9 NULL 2 Using where; Using index for group-by; Using temporary +1 SIMPLE t1 ref b b 4 const 6 Using where; Using index; Using temporary SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b; MIN(a) b @@ -2460,7 +2465,7 @@ MIN(a) b EXPLAIN SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range b b 9 NULL 2 Using where; Using index for group-by +1 SIMPLE t1 ref b b 4 const 6 Using where; Using index SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b; MIN(a) b diff --git a/mysql-test/main/group_by.test b/mysql-test/main/group_by.test index 97209b50cda..9de6eb1d2e7 100644 --- a/mysql-test/main/group_by.test +++ b/mysql-test/main/group_by.test @@ -1,3 +1,5 @@ +--source include/have_sequence.inc + # Initialise --disable_warnings drop table if exists t1,t2,t3; @@ -1136,6 +1138,11 @@ SELECT a from t2 GROUP BY a; EXPLAIN SELECT b from t2 GROUP BY b; SELECT b from t2 GROUP BY b; +# Show that we are using 'range' when there is more NULL rows in the table +insert into t2 SELECT NULL, NULL from seq_1_to_10; +--echo # Expect: Using index for group-by +EXPLAIN SELECT b from t2 GROUP BY a; + DROP TABLE t1; DROP TABLE t2; diff --git a/mysql-test/main/group_min_max.result b/mysql-test/main/group_min_max.result index ead3dea7145..5ab0f950aa7 100644 --- a/mysql-test/main/group_min_max.result +++ b/mysql-test/main/group_min_max.result @@ -2353,6 +2353,9 @@ t1; id2 id3 id5 id4 id3 id6 id5 id1 1 1 1 1 1 1 1 1 DROP TABLE t1,t2,t3,t4,t5,t6; +# +# Bug#22342: No results returned for query using max and group by +# CREATE TABLE t1 (a int, b int, KEY (a,b), KEY b (b)); INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3), @@ -2361,17 +2364,28 @@ ANALYZE TABLE t1; Table Op Msg_type Msg_text test.t1 analyze status Engine-independent statistics collected test.t1 analyze status OK +CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)); +INSERT INTO t2 SELECT a,b,b FROM t1; explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a,b a 10 NULL 2 Using where; Using index for group-by +1 SIMPLE t1 range a,b a 10 NULL 6 Using where; Using index +insert into t1 select 1,seq from seq_1_to_100; +explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b a 10 NULL 6 Using where; Using index +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a,b a 10 NULL 1 Using where; Using index for group-by SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; MAX(b) a 1 1 SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a; MIN(b) a 2 1 -CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)); -INSERT INTO t2 SELECT a,b,b FROM t1; explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 range PRIMARY PRIMARY 12 NULL 1 Using where; Using index @@ -3657,14 +3671,34 @@ KEY (f1,f2) ) ; insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'), (3, 'A'), (3, 'B'), (3, 'C'), (3, 'D'); +explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range NULL f1 5 NULL 8 Using index for group-by (scanning) SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; f1 COUNT(DISTINCT f2) 1 3 2 1 3 4 +insert into t1 select seq/10,char(64+mod(seq,4)) from seq_1_to_100; +explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range NULL f1 5 NULL 10 Using index for group-by +SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +f1 COUNT(DISTINCT f2) +0 4 +1 4 +2 4 +3 5 +4 4 +5 4 +6 4 +7 4 +8 4 +9 4 +10 4 explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL f1 5 NULL 8 Using index for group-by +1 SIMPLE t1 range NULL f1 5 NULL 10 Using index for group-by drop table t1; # End of test#50539. # @@ -3760,7 +3794,7 @@ CREATE INDEX break_it ON t1 (a, b); EXPLAIN SELECT distinct a, b FROM t1 where a = '3' ORDER BY b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range break_it break_it 10 NULL 2 Using where; Using index for group-by; Using filesort +1 SIMPLE t1 ref break_it break_it 5 const 6 Using where; Using index; Using filesort SELECT distinct a, b FROM t1 where a = '3' ORDER BY b; a b 3 1 diff --git a/mysql-test/main/group_min_max.test b/mysql-test/main/group_min_max.test index ed65745a509..c2e9db1c7be 100644 --- a/mysql-test/main/group_min_max.test +++ b/mysql-test/main/group_min_max.test @@ -898,23 +898,27 @@ t1; DROP TABLE t1,t2,t3,t4,t5,t6; -# -# Bug#22342: No results returned for query using max and group by -# +--echo # +--echo # Bug#22342: No results returned for query using max and group by +--echo # CREATE TABLE t1 (a int, b int, KEY (a,b), KEY b (b)); INSERT INTO t1 VALUES (1,1),(1,2),(1,0),(1,3), (1,-1),(1,-2),(1,-3),(1,-4); ANALYZE TABLE t1; +CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)); +INSERT INTO t2 SELECT a,b,b FROM t1; explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +insert into t1 select 1,seq from seq_1_to_100; +explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; +analyze table t1; +explain SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; + SELECT MAX(b), a FROM t1 WHERE b < 2 AND a = 1 GROUP BY a; SELECT MIN(b), a FROM t1 WHERE b > 1 AND a = 1 GROUP BY a; -CREATE TABLE t2 (a int, b int, c int, PRIMARY KEY (a,b,c)); -INSERT INTO t2 SELECT a,b,b FROM t1; explain SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; SELECT MIN(c) FROM t2 WHERE b = 2 and a = 1 and c > 1 GROUP BY a; - DROP TABLE t1,t2; # @@ -1438,10 +1442,12 @@ CREATE TABLE t1 ( ) ; insert into t1 values(1,'A'),(1 , 'B'), (1, 'C'), (2, 'A'), (3, 'A'), (3, 'B'), (3, 'C'), (3, 'D'); - +explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; +insert into t1 select seq/10,char(64+mod(seq,4)) from seq_1_to_100; +explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; explain SELECT f1, COUNT(DISTINCT f2) FROM t1 GROUP BY f1; - drop table t1; --echo # End of test#50539. diff --git a/mysql-test/main/group_min_max_innodb.result b/mysql-test/main/group_min_max_innodb.result index 3586ad5237f..9671e835934 100644 --- a/mysql-test/main/group_min_max_innodb.result +++ b/mysql-test/main/group_min_max_innodb.result @@ -180,7 +180,7 @@ F 17 EXPLAIN SELECT c1, max(i2) FROM t1 WHERE (c1 = 'C' OR c1 = 'F' ) AND ( i2 = 17 ) GROUP BY c1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range k1 k1 5 NULL 1 Using where; Using index for group-by +1 SIMPLE t1 range k1 k1 5 NULL 2 Using where; Using index for group-by SELECT c1, max(i2) FROM t1 WHERE (c1 = 'C' OR c1 = 'F' ) AND ( i2 = 17 ) GROUP BY c1; c1 max(i2) @@ -200,7 +200,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2 WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ) GROUP BY c1,i1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range k2 k2 9 NULL 60 Using where; Using index for group-by +1 SIMPLE t2 range k2 k2 5 NULL 60 Using where; Using index SELECT c1, i1, max(i2) FROM t2 WHERE (c1 = 'C' OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 ) GROUP BY c1,i1; @@ -211,7 +211,7 @@ EXPLAIN SELECT c1, i1, max(i2) FROM t2 WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )) GROUP BY c1,i1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range k2 k2 9 NULL 60 Using where; Using index for group-by +1 SIMPLE t2 range k2 k2 5 NULL 60 Using where; Using index SELECT c1, i1, max(i2) FROM t2 WHERE (((c1 = 'C' AND i1 < 40) OR ( c1 = 'F' AND i1 < 35)) AND ( i2 = 17 )) GROUP BY c1,i1; diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 03fc852fb5a..de677e21386 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -1211,7 +1211,7 @@ EXPLAIN SELECT DISTINCT a FROM t1 { "index": "a", "covering": true, "rows": 5, - "cost": 6.25 + "cost": 6.5 } ] }, @@ -1223,7 +1223,7 @@ EXPLAIN SELECT DISTINCT a FROM t1 { "max_aggregate": false, "distinct_aggregate": false, "rows": 5, - "cost": 6.25, + "cost": 6.5, "key_parts_used_for_access": ["a"], "ranges": [], "chosen": true @@ -1237,12 +1237,12 @@ EXPLAIN SELECT DISTINCT a FROM t1 { "max_aggregate": false, "distinct_aggregate": false, "rows": 5, - "cost": 6.25, + "cost": 6.5, "key_parts_used_for_access": ["a"], "ranges": [] }, "rows_for_plan": 5, - "cost_for_plan": 6.25, + "cost_for_plan": 6.5, "chosen": true } } @@ -1259,19 +1259,19 @@ EXPLAIN SELECT DISTINCT a FROM t1 { { "access_type": "index_merge", "resulting_rows": 5, - "cost": 6.25, + "cost": 6.5, "chosen": true } ], "chosen_access_method": { "type": "index_merge", "records": 5, - "cost": 6.25, + "cost": 6.5, "uses_join_buffering": false } }, "rows_for_plan": 5, - "cost_for_plan": 7.25 + "cost_for_plan": 7.5 } ] }, @@ -1312,7 +1312,7 @@ test.t1 analyze status Engine-independent statistics collected test.t1 analyze status OK EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL a 20 NULL 7 Using where; Using index for group-by +1 SIMPLE t1 index NULL a 20 NULL 7 Using where; Using index select * from information_schema.OPTIMIZER_TRACE; QUERY TRACE MISSING_BYTES_BEYOND_MAX_MEM_SIZE INSUFFICIENT_PRIVILEGES EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { @@ -1400,7 +1400,7 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { "covering": true, "ranges": ["(2,3) <= (b,c) <= (2,3)"], "rows": 7, - "cost": 2.05 + "cost": 2.75 } ] }, @@ -1412,29 +1412,29 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { "max_aggregate": false, "distinct_aggregate": false, "rows": 7, - "cost": 2.05, + "cost": 2.75, "key_parts_used_for_access": ["a", "b", "c"], "ranges": ["(2,3) <= (b,c) <= (2,3)"], - "chosen": true - }, - "chosen_range_access_summary": { - "range_access_plan": { - "type": "index_group", - "index": "a", - "min_max_arg": "d", - "min_aggregate": true, - "max_aggregate": false, - "distinct_aggregate": false, - "rows": 7, - "cost": 2.05, - "key_parts_used_for_access": ["a", "b", "c"], - "ranges": ["(2,3) <= (b,c) <= (2,3)"] - }, - "rows_for_plan": 7, - "cost_for_plan": 2.05, - "chosen": true + "chosen": false, + "cause": "cost" } } + }, + { + "selectivity_for_indexes": [], + "selectivity_for_columns": [ + { + "column_name": "b", + "ranges": ["2 <= b <= 2"], + "selectivity_from_histogram": 0.2890625 + }, + { + "column_name": "c", + "ranges": ["3 <= c <= 3"], + "selectivity_from_histogram": 0.2890625 + } + ], + "cond_selectivity": 0.083557129 } ] }, @@ -1446,23 +1446,23 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { "best_access_path": { "considered_access_paths": [ { - "access_type": "index_merge", - "resulting_rows": 7, - "cost": 2.05, + "access_type": "scan", + "resulting_rows": 1, + "cost": 3.229052734, "chosen": true, "use_tmp_table": true } ], "chosen_access_method": { - "type": "index_merge", - "records": 7, - "cost": 2.05, + "type": "scan", + "records": 1, + "cost": 3.229052734, "uses_join_buffering": false } }, - "rows_for_plan": 7, - "cost_for_plan": 3.45, - "cost_for_sorting": 7 + "rows_for_plan": 1, + "cost_for_plan": 3.429052734, + "cost_for_sorting": 1 } ] }, @@ -1485,6 +1485,25 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { } ] } + }, + { + "reconsidering_access_paths_for_index_ordering": { + "clause": "GROUP BY", + "fanout": 1, + "read_time": 3.230052734, + "table": "t1", + "rows_estimation": 7, + "possible_keys": [ + { + "index": "a", + "can_resolve_order": true, + "updated_limit": 7, + "index_scan_time": 7, + "records": 7, + "chosen": true + } + ] + } } ] } @@ -1598,7 +1617,7 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { "covering": true, "ranges": ["(2001-01-04) <= (a)"], "rows": 9, - "cost": 2.35 + "cost": 3.25 } ] }, @@ -1610,7 +1629,7 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { "max_aggregate": true, "distinct_aggregate": false, "rows": 9, - "cost": 2.35, + "cost": 3.25, "key_parts_used_for_access": ["id"], "ranges": ["(2001-01-04) <= (a)"], "chosen": true @@ -1624,12 +1643,12 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { "max_aggregate": true, "distinct_aggregate": false, "rows": 9, - "cost": 2.35, + "cost": 3.25, "key_parts_used_for_access": ["id"], "ranges": ["(2001-01-04) <= (a)"] }, "rows_for_plan": 9, - "cost_for_plan": 2.35, + "cost_for_plan": 3.25, "chosen": true } } @@ -1646,7 +1665,7 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { { "access_type": "index_merge", "resulting_rows": 9, - "cost": 2.35, + "cost": 3.25, "chosen": true, "use_tmp_table": true } @@ -1654,12 +1673,12 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { "chosen_access_method": { "type": "index_merge", "records": 9, - "cost": 2.35, + "cost": 3.25, "uses_join_buffering": false } }, "rows_for_plan": 9, - "cost_for_plan": 4.15, + "cost_for_plan": 5.05, "cost_for_sorting": 9 } ] @@ -1785,7 +1804,7 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { "covering": true, "ranges": ["(2001-01-04) <= (a) <= (2001-01-04)"], "rows": 9, - "cost": 2.35 + "cost": 3.25 } ] }, @@ -1797,7 +1816,7 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { "max_aggregate": false, "distinct_aggregate": false, "rows": 9, - "cost": 2.35, + "cost": 3.25, "key_parts_used_for_access": ["id", "a"], "ranges": ["(2001-01-04) <= (a) <= (2001-01-04)"], "chosen": true @@ -1811,12 +1830,12 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { "max_aggregate": false, "distinct_aggregate": false, "rows": 9, - "cost": 2.35, + "cost": 3.25, "key_parts_used_for_access": ["id", "a"], "ranges": ["(2001-01-04) <= (a) <= (2001-01-04)"] }, "rows_for_plan": 9, - "cost_for_plan": 2.35, + "cost_for_plan": 3.25, "chosen": true } } @@ -1833,7 +1852,7 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { { "access_type": "index_merge", "resulting_rows": 9, - "cost": 2.35, + "cost": 3.25, "chosen": true, "use_tmp_table": true } @@ -1841,12 +1860,12 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { "chosen_access_method": { "type": "index_merge", "records": 9, - "cost": 2.35, + "cost": 3.25, "uses_join_buffering": false } }, "rows_for_plan": 9, - "cost_for_plan": 4.15, + "cost_for_plan": 5.05, "cost_for_sorting": 9 } ] diff --git a/mysql-test/main/partition_range.result b/mysql-test/main/partition_range.result index 7384def5c85..320a15fa5b5 100644 --- a/mysql-test/main/partition_range.result +++ b/mysql-test/main/partition_range.result @@ -4,13 +4,16 @@ drop table if exists t1, t2; # CREATE TABLE t1 (a INT,b INT,KEY a (a,b)); INSERT INTO `t1` VALUES (0,580092),(3000,894076),(4000,805483),(4000,913540),(6000,611137),(8000,171602),(9000,599495),(9000,746305),(10000,272829),(10000,847519),(12000,258869),(12000,929028),(13000,288970),(15000,20971),(15000,105839),(16000,788272),(17000,76914),(18000,827274),(19000,802258),(20000,123677),(20000,587729),(22000,701449),(25000,31565),(25000,230782),(25000,442887),(25000,733139),(25000,851020); +SELECT COUNT(*) from t1 where a IN (10000, 1000000, 3000); +COUNT(*) +3 EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 5 NULL 1 Using where; Using index for group-by +1 SIMPLE t1 range a a 5 NULL 4 Using where; Using index alter table t1 partition by hash(a) partitions 1; EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 5 NULL 1 Using where; Using index for group-by +1 SIMPLE t1 range a a 5 NULL 4 Using where; Using index alter table t1 remove partitioning; insert into t1 (a,b) select seq,seq from seq_4001_to_4100; insert into t1 (a,b) select seq,seq from seq_10001_to_10100; diff --git a/mysql-test/main/partition_range.test b/mysql-test/main/partition_range.test index f56851217cf..740cbcd7d7b 100644 --- a/mysql-test/main/partition_range.test +++ b/mysql-test/main/partition_range.test @@ -17,6 +17,7 @@ drop table if exists t1, t2; CREATE TABLE t1 (a INT,b INT,KEY a (a,b)); INSERT INTO `t1` VALUES (0,580092),(3000,894076),(4000,805483),(4000,913540),(6000,611137),(8000,171602),(9000,599495),(9000,746305),(10000,272829),(10000,847519),(12000,258869),(12000,929028),(13000,288970),(15000,20971),(15000,105839),(16000,788272),(17000,76914),(18000,827274),(19000,802258),(20000,123677),(20000,587729),(22000,701449),(25000,31565),(25000,230782),(25000,442887),(25000,733139),(25000,851020); +SELECT COUNT(*) from t1 where a IN (10000, 1000000, 3000); EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10000, 1000000, 3000) GROUP BY a; alter table t1 partition by hash(a) partitions 1; diff --git a/mysql-test/main/subselect_mat.result b/mysql-test/main/subselect_mat.result index 83213e0750d..647124e5581 100644 --- a/mysql-test/main/subselect_mat.result +++ b/mysql-test/main/subselect_mat.result @@ -1142,7 +1142,7 @@ a explain extended select a from t1 group by a having a in (select c from t2 where d >= 20); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by +1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index 2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`)))) @@ -1154,7 +1154,7 @@ create index iab on t1(a, b); explain extended select a from t1 group by a having a in (select c from t2 where d >= 20); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by +1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index 2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`)))) @@ -1166,7 +1166,7 @@ explain extended select a from t1 group by a having a in (select c from t2 where d >= some(select e from t3 where max(b)=e)); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 range NULL iab 4 NULL 7 100.00 Using index for group-by +1 PRIMARY t1 index NULL iab 8 NULL 7 100.00 Using index 2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 7 100.00 Using where 3 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 4 100.00 Using where Warnings: diff --git a/mysql-test/main/subselect_sj_mat.result b/mysql-test/main/subselect_sj_mat.result index 5ccdb385879..9dc74d28e26 100644 --- a/mysql-test/main/subselect_sj_mat.result +++ b/mysql-test/main/subselect_sj_mat.result @@ -1181,7 +1181,7 @@ a explain extended select a from t1 group by a having a in (select c from t2 where d >= 20); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by +1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index 2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`)))) @@ -1193,7 +1193,7 @@ create index iab on t1(a, b); explain extended select a from t1 group by a having a in (select c from t2 where d >= 20); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 range NULL it1a 4 NULL 7 100.00 Using index for group-by +1 PRIMARY t1 index NULL it1a 4 NULL 7 100.00 Using index 2 MATERIALIZED t2 ALL NULL NULL NULL NULL 7 100.00 Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a` from `test`.`t1` group by `test`.`t1`.`a` having <expr_cache><`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`a`,`test`.`t1`.`a` in ( <materialize> (/* select#2 */ select `test`.`t2`.`c` from `test`.`t2` where `test`.`t2`.`d` >= 20 ), <primary_index_lookup>(`test`.`t1`.`a` in <temporary table> on distinct_key where `test`.`t1`.`a` = `<subquery2>`.`c`)))) @@ -1205,7 +1205,7 @@ explain extended select a from t1 group by a having a in (select c from t2 where d >= some(select e from t3 where max(b)=e)); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 range NULL iab 4 NULL 7 100.00 Using index for group-by +1 PRIMARY t1 index NULL iab 8 NULL 7 100.00 Using index 2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 7 100.00 Using where 3 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 4 100.00 Using where Warnings: diff --git a/mysql-test/suite/gcol/r/gcol_select_innodb.result b/mysql-test/suite/gcol/r/gcol_select_innodb.result index 8ff8a8e3dd9..e5606663f29 100644 --- a/mysql-test/suite/gcol/r/gcol_select_innodb.result +++ b/mysql-test/suite/gcol/r/gcol_select_innodb.result @@ -146,7 +146,7 @@ count(distinct c) 3 explain select count(distinct c) from t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by +1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning) ### ### filesort & range-based utils ### diff --git a/mysql-test/suite/gcol/r/gcol_select_myisam.result b/mysql-test/suite/gcol/r/gcol_select_myisam.result index a29fc0a32ee..01b80ef48bc 100644 --- a/mysql-test/suite/gcol/r/gcol_select_myisam.result +++ b/mysql-test/suite/gcol/r/gcol_select_myisam.result @@ -146,7 +146,7 @@ count(distinct c) 3 explain select count(distinct c) from t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by +1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning) ### ### filesort & range-based utils ### diff --git a/mysql-test/suite/vcol/r/vcol_select_innodb.result b/mysql-test/suite/vcol/r/vcol_select_innodb.result index 57a17cbe468..0f1e1df2928 100644 --- a/mysql-test/suite/vcol/r/vcol_select_innodb.result +++ b/mysql-test/suite/vcol/r/vcol_select_innodb.result @@ -135,7 +135,7 @@ count(distinct c) 3 explain select count(distinct c) from t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by +1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning) ### ### filesort & range-based utils ### diff --git a/mysql-test/suite/vcol/r/vcol_select_myisam.result b/mysql-test/suite/vcol/r/vcol_select_myisam.result index d8271252137..301c9defb6d 100644 --- a/mysql-test/suite/vcol/r/vcol_select_myisam.result +++ b/mysql-test/suite/vcol/r/vcol_select_myisam.result @@ -133,7 +133,7 @@ count(distinct c) 3 explain select count(distinct c) from t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by +1 SIMPLE t1 range NULL c 5 NULL 5 Using index for group-by (scanning) ### ### filesort & range-based utils ### diff --git a/sql/opt_range.cc b/sql/opt_range.cc index e783f65786b..24624e15252 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -13686,6 +13686,12 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, The group-by list is a permutation of the select attributes, according to their order in the index. + EXAMPLES of handled queries + select max(keypart2) from t1 group by keypart1 + select max(keypart2) from t1 where keypart2 <= const group by keypart1 + select distinct keypart1 from table; + select count(distinct keypart1) from table; + TODO - What happens if the query groups by the MIN/MAX field, and there is no other field as in: "select MY_MIN(a) from t1 group by a" ? @@ -14767,7 +14773,22 @@ get_field_keypart(KEY *index, Field *field) This method computes the access cost of a TRP_GROUP_MIN_MAX instance and the number of rows returned. + The used algorithm used for finding the rows is: + + For each range (if no ranges, the range is the whole table) + Do an index search for the start of the range + As long as the found key is withing the range + If the found row matches the where clause, use the row otherwise skip it + Scan index for next group, jumping over all identical keys, done in + QUICK_GROUP_MIN_MAX_SELECT::next_prefix + If the engine does not support a native next_prefix, we will + either scan the index for the next value or do a new index dive + with 'find next bigger key'. + NOTES + See get_best_group_min_max() for which kind of queries this function + will be called. + The cost computation distinguishes several cases: 1) No equality predicates over non-group attributes (thus no key_infix). If groups are bigger than blocks on the average, then we assume that it @@ -14834,17 +14855,18 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, if (!group_key_parts) { /* Summary over the whole table */ - keys_per_group= table_records; + keys_per_group= MY_MAX(table_records,1); } else { keys_per_group= (ha_rows) index_info->actual_rec_per_key(group_key_parts - 1); + if (keys_per_group == 0) /* If there is no statistics try to guess */ + { + /* each group contains 10% of all records */ + keys_per_group= (table_records / 10) + 1; + } } - - if (keys_per_group == 0) /* If there is no statistics try to guess */ - /* each group contains 10% of all records */ - keys_per_group= (table_records / 10) + 1; num_groups= (table_records / keys_per_group) + 1; /* Apply the selectivity of the quick select for group prefixes. */ @@ -14853,7 +14875,21 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, quick_prefix_selectivity= (double) quick_prefix_records / (double) table_records; num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity); - set_if_bigger(num_groups, 1); + /* + Expect at least as many groups as there is ranges in the index + WHERE a in (1,2,3) GROUP BY a + + This is mostly relevant for queries with few records, which is + something we have a lot of in our test suites. + In theory it is possible to scan the index_tree and for cases + where all ranges are eq ranges, we could calculate the exact number + of groups. This is probably an overkill so for now we estimate + the lower level of number of groups by the range elements in the + tree. + */ + set_if_bigger(num_groups, MY_MAX(index_tree->elements, 1)); + /* There cannot be more groups than matched records */ + set_if_smaller(num_groups, quick_prefix_records); } /* Ensure we don't have more groups than rows in table */ set_if_smaller(num_groups, table_records); @@ -14890,21 +14926,22 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, group. To make the CPU cost reflect this, we estimate the CPU cost as the sum of: 1. Cost for evaluating the condition for each num_group - (1/TIME_FOR_COMPARE) (similarly as for index scan). + (1/TIME_FOR_COMPARE_IDX) (similarly as for index scan). 2. Cost for navigating the index structure (assuming a b-tree). Note: We only add the cost for one index comparision per block. For a b-tree the number of comparisons will be larger. However the cost is low as all of the upper level b-tree blocks should be in memory. TODO: This cost should be provided by the storage engine. + 3. Cost for comparing the row with the where clause */ const double tree_traversal_cost= ceil(log(static_cast<double>(table_records))/ log(static_cast<double>(keys_per_block))) * - 1/(2*TIME_FOR_COMPARE); + 1/(TIME_FOR_COMPARE_IDX); const double cpu_cost= (num_groups * - (tree_traversal_cost + 1/TIME_FOR_COMPARE_IDX)); + (tree_traversal_cost + 1/TIME_FOR_COMPARE)); *read_cost= io_cost + cpu_cost; *records= num_groups; |