diff options
-rw-r--r-- | include/my_base.h | 8 | ||||
-rw-r--r-- | include/my_bitmap.h | 2 | ||||
-rw-r--r-- | include/my_sys.h | 1 | ||||
-rw-r--r-- | innobase/include/row0mysql.h | 4 | ||||
-rw-r--r-- | innobase/row/row0sel.c | 31 | ||||
-rw-r--r-- | mysql-test/r/index_merge_ror.result | 168 | ||||
-rw-r--r-- | mysql-test/r/index_merge_ror_cpk.result | 93 | ||||
-rw-r--r-- | mysql-test/r/rowid_order_bdb.result | 186 | ||||
-rw-r--r-- | mysql-test/r/rowid_order_innodb.result | 186 | ||||
-rw-r--r-- | mysql-test/t/index_merge_ror.test | 215 | ||||
-rw-r--r-- | mysql-test/t/index_merge_ror_cpk.test | 81 | ||||
-rw-r--r-- | mysql-test/t/rowid_order_bdb.test | 108 | ||||
-rw-r--r-- | mysql-test/t/rowid_order_innodb.test | 108 | ||||
-rw-r--r-- | mysys/my_bit.c | 5 | ||||
-rw-r--r-- | mysys/my_bitmap.c | 56 | ||||
-rw-r--r-- | sql/ha_berkeley.cc | 25 | ||||
-rw-r--r-- | sql/ha_berkeley.h | 1 | ||||
-rw-r--r-- | sql/ha_heap.h | 7 | ||||
-rw-r--r-- | sql/ha_innodb.cc | 57 | ||||
-rw-r--r-- | sql/ha_innodb.h | 1 | ||||
-rw-r--r-- | sql/handler.h | 5 | ||||
-rw-r--r-- | sql/opt_range.cc | 2523 | ||||
-rw-r--r-- | sql/opt_range.h | 163 | ||||
-rw-r--r-- | sql/sql_delete.cc | 16 | ||||
-rw-r--r-- | sql/sql_select.cc | 76 | ||||
-rw-r--r-- | sql/sql_select.h | 2 | ||||
-rw-r--r-- | sql/sql_test.cc | 33 | ||||
-rw-r--r-- | sql/sql_update.cc | 41 |
28 files changed, 3711 insertions, 491 deletions
diff --git a/include/my_base.h b/include/my_base.h index 89b46de520f..19843d413c4 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -132,7 +132,13 @@ enum ha_extra_function { HA_EXTRA_RETRIEVE_ALL_COLS, HA_EXTRA_PREPARE_FOR_DELETE, HA_EXTRA_PREPARE_FOR_UPDATE, /* Remove read cache if problems */ - HA_EXTRA_PRELOAD_BUFFER_SIZE /* Set buffer size for preloading */ + HA_EXTRA_PRELOAD_BUFFER_SIZE, /* Set buffer size for preloading */ + /* + When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep + other fields intact. When this is off (by default) InnoDB will use memcpy + to overwrite entire row. + */ + HA_EXTRA_KEYREAD_PRESERVE_FIELDS }; /* The following is parameter to ha_panic() */ diff --git a/include/my_bitmap.h b/include/my_bitmap.h index 5b3da011f54..bb59cead092 100644 --- a/include/my_bitmap.h +++ b/include/my_bitmap.h @@ -46,6 +46,8 @@ extern my_bool bitmap_is_set(const MY_BITMAP *map, uint bitmap_bit); extern my_bool bitmap_is_set_all(const MY_BITMAP *map); extern my_bool bitmap_is_subset(const MY_BITMAP *map1, const MY_BITMAP *map2); extern uint bitmap_set_next(MY_BITMAP *map); +extern uint bitmap_get_first(const MY_BITMAP *map); +extern uint bitmap_bits_set(const MY_BITMAP *map); extern void bitmap_clear_all(MY_BITMAP *map); extern void bitmap_clear_bit(MY_BITMAP *map, uint bitmap_bit); extern void bitmap_free(MY_BITMAP *map); diff --git a/include/my_sys.h b/include/my_sys.h index d24bef11182..5e7108f552a 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -744,6 +744,7 @@ extern byte *my_compress_alloc(const byte *packet, ulong *len, ulong *complen); extern ha_checksum my_checksum(ha_checksum crc, const byte *mem, uint count); extern uint my_bit_log2(ulong value); extern uint my_count_bits(ulonglong v); +extern uint my_count_bits_ushort(ushort v); extern void my_sleep(ulong m_seconds); extern ulong crc32(ulong crc, const uchar *buf, uint len); diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index fade3709631..e81efc7e36b 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -552,6 +552,10 @@ struct row_prebuilt_struct { allocated mem buf start, because there is a 4 byte magic number at the start and at the end */ + ibool keep_other_fields_on_keyread; /* when using fetch + cache with HA_EXTRA_KEYREAD, don't + overwrite other fields in mysql row + row buffer.*/ ulint fetch_cache_first;/* position of the first not yet fetched row in fetch_cache */ ulint n_fetch_cached; /* number of not yet fetched rows diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 2215e3feff8..b4933a09628 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -2577,10 +2577,35 @@ row_sel_pop_cached_row_for_mysql( row */ row_prebuilt_t* prebuilt) /* in: prebuilt struct */ { - ut_ad(prebuilt->n_fetch_cached > 0); + ulint i; + mysql_row_templ_t* templ; + byte* cached_rec; + ut_ad(prebuilt->n_fetch_cached > 0); + + if (prebuilt->keep_other_fields_on_keyread) + { + /* Copy cache record field by field, don't touch fields that + are not covered by current key */ + cached_rec = + prebuilt->fetch_cache[prebuilt->fetch_cache_first]; + + for (i = 0; i < prebuilt->n_template; i++) { + templ = prebuilt->mysql_template + i; + ut_memcpy( + buf + templ->mysql_col_offset, + cached_rec + templ->mysql_col_offset, + templ->mysql_col_len); - ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first], - prebuilt->mysql_row_len); + if (templ->mysql_null_bit_mask) + buf[templ->mysql_null_byte_offset] &= + cached_rec[templ->mysql_null_byte_offset]; + } + } + else + { + ut_memcpy(buf, prebuilt->fetch_cache[prebuilt->fetch_cache_first], + prebuilt->mysql_row_len); + } prebuilt->n_fetch_cached--; prebuilt->fetch_cache_first++; diff --git a/mysql-test/r/index_merge_ror.result b/mysql-test/r/index_merge_ror.result new file mode 100644 index 00000000000..94d03e8bf03 --- /dev/null +++ b/mysql-test/r/index_merge_ror.result @@ -0,0 +1,168 @@ +drop table if exists t1,t0; +select count(*) from t1; +count(*) +64801 +explain select key1,key2 from t1 where key1=100 and key2=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 3 Using where; Using index +select key1,key2 from t1 where key1=100 and key2=100; +key1 key2 +100 100 +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key1,key2,key3,key4 5,5,5,5 NULL 8 Using where +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +key1 key2 key3 key4 filler1 +100 100 100 100 key1-key2-key3-key4 +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, -1, -1, 'key1-key2'); +insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 100, 100, 'key4-key3'); +explain select key1,key2,filler1 from t1 where key1=100 and key2=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 3 Using where +select key1,key2,filler1 from t1 where key1=100 and key2=100; +key1 key2 filler1 +100 100 key1-key2-key3-key4 +100 100 key1-key2 +explain select key1,key2 from t1 where key1=100 and key2=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 3 Using where; Using index +select key1,key2 from t1 where key1=100 and key2=100; +key1 key2 +100 100 +100 100 +explain select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key1,key2,key3,key4 5,5,5,5 NULL 8 Using where +select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +key1 key2 key3 key4 +100 100 100 100 +100 100 -1 -1 +-1 -1 100 100 +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key1,key2,key3,key4 5,5,5,5 NULL 8 Using where +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +key1 key2 key3 key4 filler1 +100 100 100 100 key1-key2-key3-key4 +100 100 -1 -1 key1-key2 +-1 -1 100 100 key4-key3 +explain select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3 key1,key2,key3 5,5,5 NULL 1 Using where; Using index +select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100; +key1 key2 key3 +100 100 100 +insert into t1 (key1,key2,key3,key4,filler1) values (101,101,101,101, 'key1234-101'); +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3 key1,key2,key3 5,5,5 NULL 5 Using where +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101; +key1 key2 key3 key4 filler1 +100 100 100 100 key1-key2-key3-key4 +100 100 -1 -1 key1-key2 +101 101 101 101 key1234-101 +select key1,key2, filler1 from t1 where key1=100 and key2=100; +key1 key2 filler1 +100 100 key1-key2-key3-key4 +100 100 key1-key2 +update t1 set filler1='to be deleted' where key1=100 and key2=100; +update t1 set key1=200,key2=200 where key1=100 and key2=100; +delete from t1 where key1=200 and key2=200; +select key1,key2,filler1 from t1 where key2=100 and key2=200; +key1 key2 filler1 +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key1,key2,key3,key4 5,5,5,5 NULL 8 Using where +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +key1 key2 key3 key4 filler1 +-1 -1 100 100 key4-key3 +delete from t1 where key3=100 and key4=100; +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key1,key2,key3,key4 5,5,5,5 NULL 8 Using where +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +key1 key2 key3 key4 filler1 +explain select key1,key2 from t1 where key1=100 and key2=100; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 3 Using where; Using index +select key1,key2 from t1 where key1=100 and key2=100; +key1 key2 +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-1'); +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-2'); +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-3'); +explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key3,key1,key2,key4 5,5,5,5 NULL 16 Using where +select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +key1 key2 key3 key4 filler1 +100 100 200 200 key1-key2-key3-key4-3 +100 100 200 200 key1-key2-key3-key4-2 +100 100 200 200 key1-key2-key3-key4-1 +insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, -1, 200,'key4'); +explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key3,key1,key2,key4 5,5,5,5 NULL 18 Using where +select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +key1 key2 key3 key4 filler1 +100 100 200 200 key1-key2-key3-key4-3 +100 100 200 200 key1-key2-key3-key4-2 +100 100 200 200 key1-key2-key3-key4-1 +-1 -1 -1 200 key4 +insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 200, -1,'key3'); +explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2,key3,key4 key3,key1,key2,key4 5,5,5,5 NULL 20 Using where +select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +key1 key2 key3 key4 filler1 +100 100 200 200 key1-key2-key3-key4-3 +100 100 200 200 key1-key2-key3-key4-2 +100 100 200 200 key1-key2-key3-key4-1 +-1 -1 -1 200 key4 +-1 -1 200 -1 key3 +explain select * from t1 where st_a=1 and st_b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b st_a,st_b 4,4 NULL 2508 Using where +explain select st_a,st_b from t1 where st_a=1 and st_b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b st_a,st_b 4,4 NULL 2508 Using where; Using index +explain select st_a from t1 ignore index (st_a) where st_a=1 and st_b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,stb_swt1a_2b,stb_swt1b,st_b st_b 4 const 14720 Using where +explain select * from t1 where st_a=1 and swt1a=1 and swt2a=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a sta_swt12a 12 const,const,const 958 Using where +explain select * from t1 where st_b=1 and swt1b=1 and swt2b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref stb_swt1a_2b,stb_swt1b,st_b stb_swt1b 8 const,const 3757 Using where +explain select * from t1 where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a,stb_swt1a_2b 12,12 NULL 42 Using where +explain select * from t1 ignore index (sta_swt21a, stb_swt1a_2b) +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,st_a,stb_swt1b,st_b sta_swt12a,stb_swt1b 12,8 NULL 42 Using where +explain select * from t1 ignore index (sta_swt21a, sta_swt12a, stb_swt1a_2b) +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt1a,sta_swt2a,st_a,stb_swt1b,st_b sta_swt1a,stb_swt1b 8,8 NULL 163 Using where +explain select * from t1 ignore index (sta_swt21a, sta_swt12a, stb_swt1a_2b, stb_swt1b) +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt1a,sta_swt2a,st_a,st_b sta_swt1a,st_b 8,4 NULL 640 Using where +explain select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a,stb_swt1a_2b 12,12 NULL 42 Using where +explain select * from t1 +where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt1a,stb_swt1b 8,8 NULL 163 Using where +explain select st_a from t1 +where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt1a,stb_swt1b 8,8 NULL 163 Using where; Using index +explain select st_a from t1 +where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt1a,stb_swt1b 8,8 NULL 163 Using where; Using index +drop table t0,t1; diff --git a/mysql-test/r/index_merge_ror_cpk.result b/mysql-test/r/index_merge_ror_cpk.result new file mode 100644 index 00000000000..10b4b74d3cd --- /dev/null +++ b/mysql-test/r/index_merge_ror_cpk.result @@ -0,0 +1,93 @@ +drop table if exists t1; +create table t1 +( +pk1 int not null, +pk2 int not null, +key1 int not null, +key2 int not null, +pktail1ok int not null, +pktail2ok int not null, +pktail3bad int not null, +pktail4bad int not null, +pktail5bad int not null, +pk2copy int not null, +badkey int not null, +filler1 char (200), +filler2 char (200), +key (key1), +key (key2), +/* keys with tails from CPK members */ +key (pktail1ok, pk1), +key (pktail2ok, pk1, pk2), +key (pktail3bad, pk2, pk1), +key (pktail4bad, pk1, pk2copy), +key (pktail5bad, pk1, pk2, pk2copy), +primary key (pk1, pk2) +) engine=innodb; +explain select * from t1 where pk1 = 1 and pk2 < 80 and key1=0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref PRIMARY,key1 PRIMARY 4 const 1 Using where +select * from t1 where pk1 = 1 and pk2 < 80 and key1=0; +pk1 pk2 key1 key2 pktail1ok pktail2ok pktail3bad pktail4bad pktail5bad pk2copy badkey filler1 filler2 +1 10 0 0 0 0 0 0 0 10 0 filler-data-10 filler2 +1 11 0 0 0 0 0 0 0 11 0 filler-data-11 filler2 +1 12 0 0 0 0 0 0 0 12 0 filler-data-12 filler2 +1 13 0 0 0 0 0 0 0 13 0 filler-data-13 filler2 +1 14 0 0 0 0 0 0 0 14 0 filler-data-14 filler2 +1 15 0 0 0 0 0 0 0 15 0 filler-data-15 filler2 +1 16 0 0 0 0 0 0 0 16 0 filler-data-16 filler2 +1 17 0 0 0 0 0 0 0 17 0 filler-data-17 filler2 +1 18 0 0 0 0 0 0 0 18 0 filler-data-18 filler2 +1 19 0 0 0 0 0 0 0 19 0 filler-data-19 filler2 +explain select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 intr(key1,key2) 4,4 NULL 1 Using where; Using index +select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1; +pk1 pk2 +95 50 +95 51 +95 52 +95 53 +95 54 +95 55 +95 56 +95 57 +95 58 +95 59 +explain select * from t1 where badkey=1 and key1=10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref key1 key1 4 const 101 Using where +explain select * from t1 where pk1 < 7500 and key1 = 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge PRIMARY,key1 intr(key1:PRIMARY) 4:4 NULL 38 Using where +explain select * from t1 where pktail1ok=1 and key1=10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,pktail1ok intr(key1,pktail1ok) 4,4 NULL 1 Using where +explain select * from t1 where pktail2ok=1 and key1=10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,pktail2ok intr(key1,pktail2ok) 4,4 NULL 1 Using where +explain select * from t1 where pktail3bad=1 and key1=10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref key1,pktail3bad pktail3bad 4 const 98 Using where +explain select * from t1 where pktail4bad=1 and key1=10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref key1,pktail4bad pktail4bad 4 const 99 Using where +explain select * from t1 where pktail5bad=1 and key1=10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref key1,pktail5bad pktail5bad 4 const 99 Using where +explain select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 intr(key1,key2) 4,4 NULL 1 Using where; Using index +select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10; +pk1 pk2 key1 key2 +95 50 10 10 +95 51 10 10 +95 52 10 10 +95 53 10 10 +95 54 10 10 +95 55 10 10 +95 56 10 10 +95 57 10 10 +95 58 10 10 +95 59 10 10 +drop table t1; diff --git a/mysql-test/r/rowid_order_bdb.result b/mysql-test/r/rowid_order_bdb.result new file mode 100644 index 00000000000..8f385dfc75f --- /dev/null +++ b/mysql-test/r/rowid_order_bdb.result @@ -0,0 +1,186 @@ +drop table if exists t1, t2, t3,t4; +create table t1 ( +pk1 int not NULL, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1), +KEY key1 (key1), +KEY key2 (key2) +) engine=bdb; +insert into t1 values (-5, 1, 1), +(-100, 1, 1), +(3, 1, 1), +(0, 1, 1), +(10, 1, 1); +explain select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 5 Using where +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 key1 key2 +-100 1 1 +-5 1 1 +0 1 1 +3 1 1 +10 1 1 +drop table t1; +create table t1 ( +pk1 int unsigned not NULL, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1), +KEY key1 (key1), +KEY key2 (key2) +) engine=bdb; +insert into t1 values (0, 1, 1), +(0xFFFFFFFF, 1, 1), +(0xFFFFFFFE, 1, 1), +(1, 1, 1), +(2, 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 key1 key2 +0 1 1 +1 1 1 +2 1 1 +4294967294 1 1 +4294967295 1 1 +drop table t1; +create table t1 ( +pk1 char(4) not NULL, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1), +KEY key1 (key1), +KEY key2 (key2) +) engine=bdb collate latin2_general_ci; +insert into t1 values ('a1', 1, 1), +('b2', 1, 1), +('A3', 1, 1), +('B4', 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 key1 key2 +a1 1 1 +A3 1 1 +b2 1 1 +B4 1 1 +drop table t1; +create table t1 ( +pk1 int not NULL, +pk2 char(4) not NULL collate latin1_german1_ci, +pk3 char(4) not NULL collate latin1_bin, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1,pk2,pk3), +KEY key1 (key1), +KEY key2 (key2) +) engine=bdb; +insert into t1 values +(1, 'u', 'u', 1, 1), +(1, 'u', char(0xEC), 1, 1), +(1, 'u', 'x', 1, 1); +insert ignore into t1 select pk1, char(0xEC), pk3, key1, key2 from t1; +insert ignore into t1 select pk1, 'x', pk3, key1, key2 from t1 where pk2='u'; +insert ignore into t1 select 2, pk2, pk3, key1, key2 from t1; +select * from t1; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +alter table t1 drop primary key; +select * from t1; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +drop table t1; +create table t1 ( +pk1 varchar(8) NOT NULL default '', +pk2 varchar(4) NOT NULL default '', +key1 int(11), +key2 int(11), +primary key(pk1, pk2), +KEY key1 (key1), +KEY key2 (key2) +) engine=bdb; +insert into t1 values ('','empt',2,2), +('a','a--a',2,2), +('bb','b--b',2,2), +('ccc','c--c',2,2), +('dddd','d--d',2,2); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 pk2 key1 key2 + empt 2 2 +a a--a 2 2 +bb b--b 2 2 +ccc c--c 2 2 +dddd d--d 2 2 +drop table t1; diff --git a/mysql-test/r/rowid_order_innodb.result b/mysql-test/r/rowid_order_innodb.result new file mode 100644 index 00000000000..b1de1b37e73 --- /dev/null +++ b/mysql-test/r/rowid_order_innodb.result @@ -0,0 +1,186 @@ +drop table if exists t1, t2, t3,t4; +create table t1 ( +pk1 int not NULL, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1), +KEY key1 (key1), +KEY key2 (key2) +) engine=innodb; +insert into t1 values (-5, 1, 1), +(-100, 1, 1), +(3, 1, 1), +(0, 1, 1), +(10, 1, 1); +explain select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 4 Using where +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 key1 key2 +-100 1 1 +-5 1 1 +0 1 1 +3 1 1 +10 1 1 +drop table t1; +create table t1 ( +pk1 int unsigned not NULL, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1), +KEY key1 (key1), +KEY key2 (key2) +) engine=innodb; +insert into t1 values (0, 1, 1), +(0xFFFFFFFF, 1, 1), +(0xFFFFFFFE, 1, 1), +(1, 1, 1), +(2, 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 key1 key2 +0 1 1 +1 1 1 +2 1 1 +4294967294 1 1 +4294967295 1 1 +drop table t1; +create table t1 ( +pk1 char(4) not NULL, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1), +KEY key1 (key1), +KEY key2 (key2) +) engine=innodb collate latin2_general_ci; +insert into t1 values ('a1', 1, 1), +('b2', 1, 1), +('A3', 1, 1), +('B4', 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 key1 key2 +a1 1 1 +A3 1 1 +b2 1 1 +B4 1 1 +drop table t1; +create table t1 ( +pk1 int not NULL, +pk2 char(4) not NULL collate latin1_german1_ci, +pk3 char(4) not NULL collate latin1_bin, +key1 int(11), +key2 int(11), +PRIMARY KEY (pk1,pk2,pk3), +KEY key1 (key1), +KEY key2 (key2) +) engine=innodb; +insert into t1 values +(1, 'u', 'u', 1, 1), +(1, 'u', char(0xEC), 1, 1), +(1, 'u', 'x', 1, 1); +insert ignore into t1 select pk1, char(0xEC), pk3, key1, key2 from t1; +insert ignore into t1 select pk1, 'x', pk3, key1, key2 from t1 where pk2='u'; +insert ignore into t1 select 2, pk2, pk3, key1, key2 from t1; +select * from t1; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +alter table t1 drop primary key; +select * from t1; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 pk2 pk3 key1 key2 +1 ì u 1 1 +1 ì x 1 1 +1 ì ì 1 1 +1 u u 1 1 +1 u x 1 1 +1 u ì 1 1 +1 x u 1 1 +1 x x 1 1 +1 x ì 1 1 +2 ì u 1 1 +2 ì x 1 1 +2 ì ì 1 1 +2 u u 1 1 +2 u x 1 1 +2 u ì 1 1 +2 x u 1 1 +2 x x 1 1 +2 x ì 1 1 +drop table t1; +create table t1 ( +pk1 varchar(8) NOT NULL default '', +pk2 varchar(4) NOT NULL default '', +key1 int(11), +key2 int(11), +primary key(pk1, pk2), +KEY key1 (key1), +KEY key2 (key2) +) engine=innodb; +insert into t1 values ('','empt',2,2), +('a','a--a',2,2), +('bb','b--b',2,2), +('ccc','c--c',2,2), +('dddd','d--d',2,2); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +pk1 pk2 key1 key2 + empt 2 2 +a a--a 2 2 +bb b--b 2 2 +ccc c--c 2 2 +dddd d--d 2 2 +drop table t1; diff --git a/mysql-test/t/index_merge_ror.test b/mysql-test/t/index_merge_ror.test new file mode 100644 index 00000000000..95b6204f424 --- /dev/null +++ b/mysql-test/t/index_merge_ror.test @@ -0,0 +1,215 @@ +# +# ROR-index_merge tests. +# +--disable_warnings +drop table if exists t1,t0; +--enable_warnings +--disable_query_log +create table t1 +( + /* Field names reflect value(rowid) distribution, st=STairs, swt= SaWTooth */ + st_a int not null, + swt1a int not null, + swt2a int not null, + + st_b int not null, + swt1b int not null, + swt2b int not null, + + /* fields/keys for row retrieval tests */ + key1 int, + key2 int, + key3 int, + key4 int, + + /* make rows much bigger then keys */ + filler1 char (200), + filler2 char (200), + filler3 char (200), + filler4 char (200), + filler5 char (200), + filler6 char (200), + + /* order of keys is important */ + key sta_swt12a(st_a,swt1a,swt2a), + key sta_swt1a(st_a,swt1a), + key sta_swt2a(st_a,swt2a), + key sta_swt21a(st_a,swt2a,swt1a), + + key st_a(st_a), + key stb_swt1a_2b(st_b,swt1b,swt2a), + key stb_swt1b(st_b,swt1b), + key st_b(st_b), + + key(key1), + key(key2), + key(key3), + key(key4) +) ; + +# Fill table +create table t0 as select * from t1; +let $cnt=1000; +while ($cnt) +{ + eval insert into t0 values (1, 2, 3, 1, 2, 3, 0, 0, 0, 0, 'data1', 'data2', 'data3', 'data4', 'data5', 'data6'); + dec $cnt; +} + +alter table t1 disable keys; +let $1=4; +while ($1) +{ + let $2=4; + while ($2) + { + let $3=4; + while ($3) + { + eval insert into t1 select $1, $2, $3, $1 ,$2, $3, key1, key2, key3, key4, filler1, filler2, filler3, filler4, filler5, filler6 from t0; + dec $3; + } + dec $2; + } + dec $1; +} + +# Row retrieval tests +# -1 is used for values 'out of any range we are using' +# insert enough rows for index intersection to be used for (key1,key2) +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 100, 100,'key1-key2-key3-key4'); +let $cnt=400; +while ($cnt) +{ + eval insert into t1 (key1, key2, key3, key4, filler1) values (100, -1, 100, -1,'key1-key3'); + dec $cnt; +} +let $cnt=400; +while ($cnt) +{ + eval insert into t1 (key1, key2, key3, key4, filler1) values (-1, 100, -1, 100,'key2-key4'); + dec $cnt; +} +alter table t1 enable keys; +--enable_query_log +select count(*) from t1; + +# One row results tests for cases where a single row matches all conditions +explain select key1,key2 from t1 where key1=100 and key2=100; +select key1,key2 from t1 where key1=100 and key2=100; + +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; + +# Several-rows results +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, -1, -1, 'key1-key2'); +insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 100, 100, 'key4-key3'); + +# ROR-intersection, not covering +explain select key1,key2,filler1 from t1 where key1=100 and key2=100; +select key1,key2,filler1 from t1 where key1=100 and key2=100; + +# ROR-intersection, covering +explain select key1,key2 from t1 where key1=100 and key2=100; +select key1,key2 from t1 where key1=100 and key2=100; + +# ROR-union of ROR-intersections +explain select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +select key1,key2,key3,key4 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; + +# 3-way ROR-intersection +explain select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100; +select key1,key2,key3 from t1 where key1=100 and key2=100 and key3=100; + +# ROR-union(ROR-intersection, ROR-range) +insert into t1 (key1,key2,key3,key4,filler1) values (101,101,101,101, 'key1234-101'); +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101; +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=101; + +# Run some ROR updates/deletes +select key1,key2, filler1 from t1 where key1=100 and key2=100; +update t1 set filler1='to be deleted' where key1=100 and key2=100; +update t1 set key1=200,key2=200 where key1=100 and key2=100; +delete from t1 where key1=200 and key2=200; +select key1,key2,filler1 from t1 where key2=100 and key2=200; + +# ROR-union(ROR-intersection) with one of ROR-intersection giving empty +# results +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; + +delete from t1 where key3=100 and key4=100; + +# ROR-union with all ROR-intersections giving empty results +explain select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; +select key1,key2,key3,key4,filler1 from t1 where key1=100 and key2=100 or key3=100 and key4=100; + +# ROR-intersection with empty result +explain select key1,key2 from t1 where key1=100 and key2=100; +select key1,key2 from t1 where key1=100 and key2=100; + +# ROR-union tests with various cases. +# All scans returning duplicate rows: +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-1'); +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-2'); +insert into t1 (key1, key2, key3, key4, filler1) values (100, 100, 200, 200,'key1-key2-key3-key4-3'); + +explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; + +insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, -1, 200,'key4'); + +explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; + +insert into t1 (key1, key2, key3, key4, filler1) values (-1, -1, 200, -1,'key3'); + +explain select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; +select key1,key2,key3,key4,filler1 from t1 where key3=200 or (key1=100 and key2=100) or key4=200; + +## +## Optimizer tests +## + +# Check that the shortest key is used for ROR-intersection, covering and non-covering. +explain select * from t1 where st_a=1 and st_b=1; +explain select st_a,st_b from t1 where st_a=1 and st_b=1; + +# Check if "ingore index" syntax works +explain select st_a from t1 ignore index (st_a) where st_a=1 and st_b=1; + +# Do many tests +# Check that keys that don't improve selectivity are skipped. +# + +explain select * from t1 where st_a=1 and swt1a=1 and swt2a=1; + +explain select * from t1 where st_b=1 and swt1b=1 and swt2b=1; + +explain select * from t1 where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; + +explain select * from t1 ignore index (sta_swt21a, stb_swt1a_2b) + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; + +explain select * from t1 ignore index (sta_swt21a, sta_swt12a, stb_swt1a_2b) + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; + +explain select * from t1 ignore index (sta_swt21a, sta_swt12a, stb_swt1a_2b, stb_swt1b) + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1; + +explain select * from t1 + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1; + +explain select * from t1 + where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1; + +explain select st_a from t1 + where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1; + +explain select st_a from t1 + where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1; + +drop table t0,t1; + diff --git a/mysql-test/t/index_merge_ror_cpk.test b/mysql-test/t/index_merge_ror_cpk.test new file mode 100644 index 00000000000..52e946c9be6 --- /dev/null +++ b/mysql-test/t/index_merge_ror_cpk.test @@ -0,0 +1,81 @@ +# +# Clustered PK ROR-index_merge tests +# +-- source include/have_innodb.inc + +--disable_warnings +drop table if exists t1; +--enable_warnings + +create table t1 +( + pk1 int not null, + pk2 int not null, + + key1 int not null, + key2 int not null, + + pktail1ok int not null, + pktail2ok int not null, + pktail3bad int not null, + pktail4bad int not null, + pktail5bad int not null, + + pk2copy int not null, + badkey int not null, + + filler1 char (200), + filler2 char (200), + key (key1), + key (key2), + + /* keys with tails from CPK members */ + key (pktail1ok, pk1), + key (pktail2ok, pk1, pk2), + key (pktail3bad, pk2, pk1), + key (pktail4bad, pk1, pk2copy), + key (pktail5bad, pk1, pk2, pk2copy), + + primary key (pk1, pk2) +) engine=innodb; + +--disable_query_log +set autocommit=0; +let $1=10000; +while ($1) +{ + eval insert into t1 values ($1 div 10,$1 mod 100, $1/100,$1/100, $1/100,$1/100,$1/100,$1/100,$1/100, $1 mod 100, $1/1000,'filler-data-$1','filler2'); + dec $1; +} +set autocommit=1; +--enable_query_log + +# Verify that range scan on CPK is ROR +# (use index_intersection because it is impossible to check that for index union) +explain select * from t1 where pk1 = 1 and pk2 < 80 and key1=0; +# CPK scan + 1 ROR range scan is a special case +select * from t1 where pk1 = 1 and pk2 < 80 and key1=0; + +# Verify that CPK fields are considered to be covered by index scans +explain select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1; +select pk1,pk2 from t1 where key1 = 10 and key2=10 and 2*pk1+1 < 2*96+1; + +# Verify that CPK is always used for index intersection scans +# (this is because it is used as a filter, not for retrieval) +explain select * from t1 where badkey=1 and key1=10; +explain select * from t1 where pk1 < 7500 and key1 = 10; + +# Verify that keys with 'tails' of PK members are ok. +explain select * from t1 where pktail1ok=1 and key1=10; +explain select * from t1 where pktail2ok=1 and key1=10; + +explain select * from t1 where pktail3bad=1 and key1=10; +explain select * from t1 where pktail4bad=1 and key1=10; +explain select * from t1 where pktail5bad=1 and key1=10; + +# Test for problem with innodb key values prefetch buffer: +explain select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10; +select pk1,pk2,key1,key2 from t1 where key1 = 10 and key2=10 limit 10; + +drop table t1; + diff --git a/mysql-test/t/rowid_order_bdb.test b/mysql-test/t/rowid_order_bdb.test new file mode 100644 index 00000000000..ef133054c35 --- /dev/null +++ b/mysql-test/t/rowid_order_bdb.test @@ -0,0 +1,108 @@ +# +# Test for rowid ordering (and comparison) functions. +# do index_merge select for tables with PK of various types. +# +--disable_warnings +drop table if exists t1, t2, t3,t4; +--enable_warnings + +-- source include/have_bdb.inc + +# Signed number as rowid +create table t1 ( + pk1 int not NULL, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1), + KEY key1 (key1), + KEY key2 (key2) +) engine=bdb; +insert into t1 values (-5, 1, 1), + (-100, 1, 1), + (3, 1, 1), + (0, 1, 1), + (10, 1, 1); +explain select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Unsigned numbers as rowids +create table t1 ( + pk1 int unsigned not NULL, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1), + KEY key1 (key1), + KEY key2 (key2) +) engine=bdb; +insert into t1 values (0, 1, 1), + (0xFFFFFFFF, 1, 1), + (0xFFFFFFFE, 1, 1), + (1, 1, 1), + (2, 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Case-insensitive char(N) +create table t1 ( + pk1 char(4) not NULL, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1), + KEY key1 (key1), + KEY key2 (key2) +) engine=bdb collate latin2_general_ci; +insert into t1 values ('a1', 1, 1), + ('b2', 1, 1), + ('A3', 1, 1), + ('B4', 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Multi-part PK +create table t1 ( + pk1 int not NULL, + pk2 char(4) not NULL collate latin1_german1_ci, + pk3 char(4) not NULL collate latin1_bin, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1,pk2,pk3), + KEY key1 (key1), + KEY key2 (key2) +) engine=bdb; +insert into t1 values + (1, 'u', 'u', 1, 1), + (1, 'u', char(0xEC), 1, 1), + (1, 'u', 'x', 1, 1); +insert ignore into t1 select pk1, char(0xEC), pk3, key1, key2 from t1; +insert ignore into t1 select pk1, 'x', pk3, key1, key2 from t1 where pk2='u'; +insert ignore into t1 select 2, pk2, pk3, key1, key2 from t1; +select * from t1; +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; + +# Hidden PK +alter table t1 drop primary key; +select * from t1; +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Variable-length PK +# this is also test for Bug#2688 +create table t1 ( + pk1 varchar(8) NOT NULL default '', + pk2 varchar(4) NOT NULL default '', + key1 int(11), + key2 int(11), + primary key(pk1, pk2), + KEY key1 (key1), + KEY key2 (key2) +) engine=bdb; +insert into t1 values ('','empt',2,2), + ('a','a--a',2,2), + ('bb','b--b',2,2), + ('ccc','c--c',2,2), + ('dddd','d--d',2,2); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; + +drop table t1; + diff --git a/mysql-test/t/rowid_order_innodb.test b/mysql-test/t/rowid_order_innodb.test new file mode 100644 index 00000000000..fb4959d78e6 --- /dev/null +++ b/mysql-test/t/rowid_order_innodb.test @@ -0,0 +1,108 @@ +# +# Test for rowid ordering (and comparison) functions. +# do index_merge select for tables with PK of various types. +# +--disable_warnings +drop table if exists t1, t2, t3,t4; +--enable_warnings + +-- source include/have_innodb.inc + +# Signed number as rowid +create table t1 ( + pk1 int not NULL, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1), + KEY key1 (key1), + KEY key2 (key2) +) engine=innodb; +insert into t1 values (-5, 1, 1), + (-100, 1, 1), + (3, 1, 1), + (0, 1, 1), + (10, 1, 1); +explain select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Unsigned numbers as rowids +create table t1 ( + pk1 int unsigned not NULL, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1), + KEY key1 (key1), + KEY key2 (key2) +) engine=innodb; +insert into t1 values (0, 1, 1), + (0xFFFFFFFF, 1, 1), + (0xFFFFFFFE, 1, 1), + (1, 1, 1), + (2, 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Case-insensitive char(N) +create table t1 ( + pk1 char(4) not NULL, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1), + KEY key1 (key1), + KEY key2 (key2) +) engine=innodb collate latin2_general_ci; +insert into t1 values ('a1', 1, 1), + ('b2', 1, 1), + ('A3', 1, 1), + ('B4', 1, 1); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Multi-part PK +create table t1 ( + pk1 int not NULL, + pk2 char(4) not NULL collate latin1_german1_ci, + pk3 char(4) not NULL collate latin1_bin, + key1 int(11), + key2 int(11), + PRIMARY KEY (pk1,pk2,pk3), + KEY key1 (key1), + KEY key2 (key2) +) engine=innodb; +insert into t1 values + (1, 'u', 'u', 1, 1), + (1, 'u', char(0xEC), 1, 1), + (1, 'u', 'x', 1, 1); +insert ignore into t1 select pk1, char(0xEC), pk3, key1, key2 from t1; +insert ignore into t1 select pk1, 'x', pk3, key1, key2 from t1 where pk2='u'; +insert ignore into t1 select 2, pk2, pk3, key1, key2 from t1; +select * from t1; +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; + +# Hidden PK +alter table t1 drop primary key; +select * from t1; +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; +drop table t1; + +# Variable-length PK +# this is also test for Bug#2688 +create table t1 ( + pk1 varchar(8) NOT NULL default '', + pk2 varchar(4) NOT NULL default '', + key1 int(11), + key2 int(11), + primary key(pk1, pk2), + KEY key1 (key1), + KEY key2 (key2) +) engine=innodb; +insert into t1 values ('','empt',2,2), + ('a','a--a',2,2), + ('bb','b--b',2,2), + ('ccc','c--c',2,2), + ('dddd','d--d',2,2); +select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; + +drop table t1; + diff --git a/mysys/my_bit.c b/mysys/my_bit.c index 55dd72f5f76..01c9b5ea68d 100644 --- a/mysys/my_bit.c +++ b/mysys/my_bit.c @@ -71,3 +71,8 @@ uint my_count_bits(ulonglong v) #endif } +uint my_count_bits_ushort(ushort v) +{ + return nbits[v]; +} + diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index da16457c299..3c4caa413a9 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -330,3 +330,59 @@ void bitmap_union(MY_BITMAP *map, const MY_BITMAP *map2) bitmap_unlock(map); } + +/* + Get number of set bits in the bitmap +*/ +uint bitmap_bits_set(const MY_BITMAP *map) +{ + uchar *m= map->bitmap, + *end= map->bitmap+map->bitmap_size; + uint res= 0; + + DBUG_ASSERT(map->bitmap); + + bitmap_lock((MY_BITMAP *)map); + while (m < end) + { + res+= my_count_bits_ushort(*m++); + } + + bitmap_unlock((MY_BITMAP *)map); + return res; +} + + +/* + Return number of first zero bit or MY_BIT_NONE if all bits are set. +*/ + +uint bitmap_get_first(const MY_BITMAP *map) +{ + uchar *bitmap=map->bitmap; + uint bit_found = MY_BIT_NONE; + uint bitmap_size=map->bitmap_size*8; + uint i; + + DBUG_ASSERT(map->bitmap); + bitmap_lock((MY_BITMAP *)map); + for (i=0; i < bitmap_size ; i++, bitmap++) + { + if (*bitmap != 0xff) + { /* Found slot with free bit */ + uint b; + for (b=0; ; b++) + { + if (!(*bitmap & (1 << b))) + { + bit_found = (i*8)+b; + break; + } + } + break; /* Found bit */ + } + } + bitmap_unlock((MY_BITMAP *)map); + return bit_found; +} + diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index c4735403267..3daeae160cd 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -2457,4 +2457,29 @@ ha_rows ha_berkeley::estimate_number_of_rows() return share->rows + HA_BERKELEY_EXTRA_ROWS; } +int ha_berkeley::cmp_ref(const byte *ref1, const byte *ref2) +{ + if (hidden_primary_key) + return memcmp(ref1, ref2, BDB_HIDDEN_PRIMARY_KEY_LENGTH); + + int result; + Field *field; + KEY *key_info=table->key_info+table->primary_key; + KEY_PART_INFO *key_part=key_info->key_part; + KEY_PART_INFO *end=key_part+key_info->key_parts; + + for (; key_part != end; key_part++) + { + field= key_part->field; + result= field->pack_cmp((const char*)ref1, (const char*)ref2, + key_part->length); + if (result) + return result; + ref1 += field->packed_col_length((const char*)ref1, key_part->length); + ref2 += field->packed_col_length((const char*)ref2, key_part->length); + } + + return 0; +} + #endif /* HAVE_BERKELEY_DB */ diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index f225c24eaf7..f5f6194ee77 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -168,6 +168,7 @@ class ha_berkeley: public handler void print_error(int error, myf errflag); uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; } bool primary_key_is_clustered() { return true; } + int cmp_ref(const byte *ref1, const byte *ref2); }; extern bool berkeley_skip, berkeley_shared_data; diff --git a/sql/ha_heap.h b/sql/ha_heap.h index c369c7029b4..80cd143eec3 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -93,5 +93,10 @@ class ha_heap: public handler THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); - + int cmp_ref(const byte *ref1, const byte *ref2) + { + HEAP_PTR ptr1=*(HEAP_PTR*)ref1; + HEAP_PTR ptr2=*(HEAP_PTR*)ref2; + return ptr1 < ptr2? -1 : (ptr1 > ptr2? 1 : 0); + } }; diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 8bd43ea92cd..da46dc799e9 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -711,6 +711,8 @@ ha_innobase::init_table_handle_for_HANDLER(void) prebuilt->read_just_key = FALSE; prebuilt->used_in_HANDLER = TRUE; + + prebuilt->keep_other_fields_on_keyread = FALSE; } /************************************************************************* @@ -4454,9 +4456,11 @@ ha_innobase::extra( if (prebuilt->blob_heap) { row_mysql_prebuilt_free_blob_heap(prebuilt); } + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; break; case HA_EXTRA_RESET_STATE: + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; break; case HA_EXTRA_NO_KEYREAD: @@ -4468,6 +4472,9 @@ ha_innobase::extra( case HA_EXTRA_KEYREAD: prebuilt->read_just_key = 1; break; + case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: + prebuilt->keep_other_fields_on_keyread = 1; + break; default:/* Do nothing */ ; } @@ -4526,6 +4533,7 @@ ha_innobase::start_stmt( prebuilt->sql_stat_start = TRUE; prebuilt->hint_no_need_to_fetch_extra_cols = TRUE; prebuilt->read_just_key = 0; + prebuilt->keep_other_fields_on_keyread = FALSE; if (!prebuilt->mysql_has_locked) { /* This handle is for a temporary table created inside @@ -4604,6 +4612,7 @@ ha_innobase::external_lock( prebuilt->hint_no_need_to_fetch_extra_cols = TRUE; prebuilt->read_just_key = 0; + prebuilt->keep_other_fields_on_keyread = FALSE; if (lock_type == F_WRLCK) { @@ -5000,4 +5009,52 @@ ha_innobase::get_auto_increment() return(nr); } +int +ha_innobase::cmp_ref( + const mysql_byte *ref1, + const mysql_byte *ref2) +{ + row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; + enum_field_types mysql_type; + Field* field; + int result; + + if (prebuilt->clust_index_was_generated) + return memcmp(ref1, ref2, DATA_ROW_ID_LEN); + + /* Do type-aware comparison of Primary Key members. PK members + are always NOT NULL, so no checks for NULL are performed */ + KEY_PART_INFO *key_part= table->key_info[table->primary_key].key_part; + KEY_PART_INFO *key_part_end= + key_part + table->key_info[table->primary_key].key_parts; + for (; key_part != key_part_end; ++key_part) { + field = key_part->field; + mysql_type = field->type(); + if (mysql_type == FIELD_TYPE_TINY_BLOB + || mysql_type == FIELD_TYPE_MEDIUM_BLOB + || mysql_type == FIELD_TYPE_BLOB + || mysql_type == FIELD_TYPE_LONG_BLOB) { + + ut_a(!ref1[1]); + ut_a(!ref2[1]); + byte len1= *ref1; + byte len2= *ref2; + ref1 += 2; + ref2 += 2; + result = + ((Field_blob*)field)->cmp((const char*)ref1, len1, + (const char*)ref2, len2); + } else { + result = + field->cmp((const char*)ref1, (const char*)ref2); + } + + if (result) + return result; + ref1 += key_part->length; + ref2 += key_part->length; + } + return 0; +} + #endif /* HAVE_INNOBASE_DB */ diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index c305a019fcd..ff224370df1 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -188,6 +188,7 @@ class ha_innobase: public handler longlong get_auto_increment(); uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; } bool primary_key_is_clustered() { return true; } + int cmp_ref(const byte *ref1, const byte *ref2); }; extern bool innodb_skip; diff --git a/sql/handler.h b/sql/handler.h index 4a8abf5f131..386b9fa4d7d 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -382,6 +382,11 @@ public: false otherwise */ virtual bool primary_key_is_clustered() { return false; } + + virtual int cmp_ref(const byte *ref1, const byte *ref2) + { + return memcmp(ref1, ref2, ref_length); + } }; /* Some extern variables used with handlers */ diff --git a/sql/opt_range.cc b/sql/opt_range.cc index fa1b80f007e..ad48f3050c0 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -23,6 +23,19 @@ */ +/* + Classes in this file are used in the following way: + 1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects + is created. #of rows in table and index statistics are ignored at this + step. + 2. Created SEL_TREE and index stats data are used to construct a + TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read + plans may be created. + 3. The least expensive table read plan is used to create a tree of + QUICK_SELECT_I-derived objects which are later used for row retrieval. + QUICK_RANGEs are also created in this step. +*/ + #ifdef __GNUC__ #pragma implementation // gcc: Class implementation #endif @@ -167,6 +180,22 @@ public: min_value=arg->max_value; min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN; } + void store_min(uint length,char **min_key,uint min_key_flag) + { + if ((min_flag & GEOM_FLAG) || + (!(min_flag & NO_MIN_RANGE) && + !(min_key_flag & (NO_MIN_RANGE | NEAR_MIN)))) + { + if (maybe_null && *min_value) + { + **min_key=1; + bzero(*min_key+1,length); + } + else + memcpy(*min_key,min_value,length+(int) maybe_null); + (*min_key)+= length+(int) maybe_null; + } + } void store(uint length,char **min_key,uint min_key_flag, char **max_key, uint max_key_flag) { @@ -280,8 +309,21 @@ public: bzero((char*) keys,sizeof(keys)); } SEL_ARG *keys[MAX_KEY]; - key_map keys_map; /* bitmask of non-NULL elements in keys */ - List<SEL_IMERGE> merges; /* possible ways to read rows using index_merge */ + key_map keys_map; /* bitmask of non-NULL elements in keys */ + + /* + Possible ways to read rows using index_merge. The list is non-empty only + if type==KEY. Currently can be non empty only if keys_map.is_clear_all(). + */ + List<SEL_IMERGE> merges; + + /* The members below are filled/used only after get_mm_tree is done */ + key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ + uint n_ror_scans; + + struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ + struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ + /* Note that #records for each key scan is stored in table->quick_rows */ }; @@ -291,22 +333,44 @@ typedef struct st_qsel_param { KEY_PART *key_parts,*key_parts_end,*key[MAX_KEY]; MEM_ROOT *mem_root; table_map prev_tables,read_tables,current_table; - uint baseflag, keys, max_key_part, range_count; - uint real_keynr[MAX_KEY]; + uint baseflag, max_key_part, range_count; + + uint keys; /* number of keys used in the query */ + + /* used_key_no -> table_key_no translation table */ + uint real_keynr[MAX_KEY]; + char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH], max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; bool quick; // Don't calulate possible keys + uint fields_bitmap_size; + MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ + + key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */ + uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ uint imerge_cost_buff_size; /* size of the buffer */ + + bool is_ror_scan; /* true if last checked tree->key can be used for ROR-scan */ } PARAM; +class TABLE_READ_PLAN; + class TRP_RANGE; + class TRP_ROR_INTERSECT; + class TRP_ROR_UNION; + class TRP_ROR_INDEX_MERGE; + +struct st_ror_scan_info; + static SEL_TREE * get_mm_parts(PARAM *param,Field *field, Item_func::Functype type,Item *value, Item_result cmp_type); static SEL_ARG *get_mm_leaf(PARAM *param,Field *field,KEY_PART *key_part, Item_func::Functype type,Item *value); static SEL_TREE *get_mm_tree(PARAM *param,COND *cond); + +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree); static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree, char *min_key,uint min_key_flag, @@ -315,25 +379,37 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree, QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index, SEL_ARG *key_tree, MEM_ROOT *alloc = NULL); -static int get_quick_select_params(SEL_TREE *tree, PARAM *param, - key_map& needed_reg, - bool index_read_can_be_used, - double *read_time, - ha_rows *records, - SEL_ARG*** key_to_read); +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + double read_time); +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + bool force_index_only, + double read_time, + bool *are_all_covering); +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time); +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time); static int get_index_merge_params(PARAM *param, key_map& needed_reg, SEL_IMERGE *imerge, double *read_time, ha_rows* imerge_rows); -inline double get_index_only_read_time(PARAM* param, ha_rows records, +inline double get_index_only_read_time(const PARAM* param, ha_rows records, int keynr); #ifndef DBUG_OFF -static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, - const key_map *needed_reg); -void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map *needed_reg); - +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg); +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end); +static void print_rowid(byte* val, int len); +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); #endif + static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2); static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); @@ -629,7 +705,7 @@ QUICK_SELECT_I::QUICK_SELECT_I() QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, bool no_alloc, MEM_ROOT *parent_alloc) - :dont_free(0),error(0),cur_range(NULL),range(0) + :dont_free(0),error(0),free_file(0),cur_range(NULL),range(0) { index= key_nr; head= table; @@ -654,12 +730,22 @@ int QUICK_RANGE_SELECT::init() QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() { + DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT"); if (!dont_free) { file->index_end(); - delete_dynamic(&ranges); /* ranges are allocated in alloc */ - free_root(&alloc,MYF(0)); - } + file->extra(HA_EXTRA_NO_KEYREAD); + delete_dynamic(&ranges); /* ranges are allocated in alloc */ + if (free_file) + { + DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file, + free_file)); + file->reset(); + file->close(); + } + free_root(&alloc,MYF(0)); + } + DBUG_VOID_RETURN; } @@ -717,6 +803,257 @@ QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT() free_root(&alloc,MYF(0)); } + +QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, + TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + : cpk_quick(NULL), thd(thd_param), reset_called(false), + need_to_fetch_row(retrieve_full_rows) +{ + index= MAX_KEY; + head= table; + record= head->record[0]; + if (!parent_alloc) + init_sql_alloc(&alloc,1024,0); + else + bzero(&alloc, sizeof(MEM_ROOT)); + last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc, + head->file->ref_length); +} + +int QUICK_ROR_INTERSECT_SELECT::init() +{ + /* Check if last_rowid was allocated in ctor */ + return !last_rowid; +} + + +/* + Init this quick select to be a ROR child scan. + NOTE + QUICK_ROR_INTERSECT_SELECT::reset() may choose not to call this function + but reuse its handler object for doing one of range scans. It duplicates + a part of this function' code. + RETURN + 0 ROR child scan initialized, ok to use. + 1 error +*/ + +int QUICK_RANGE_SELECT::init_ror_child_scan(bool reuse_handler) +{ + handler *save_file= file; + DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_child_scan"); + + if (reuse_handler) + { + DBUG_PRINT("info", ("Reusing handler %p", file)); + if (file->extra(HA_EXTRA_KEYREAD) || + file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) | + init() || reset()) + { + DBUG_RETURN(1); + } + else + DBUG_RETURN(0); + } + + /* Create a separate handler object for this quick select */ + if (free_file) + { + /* already have own 'handler' object. */ + DBUG_RETURN(0); + } + + if (!(file= get_new_handler(head, head->db_type))) + goto failure; + DBUG_PRINT("info", ("Allocated new handler %p", file)); + if (file->ha_open(head->path, head->db_stat, HA_OPEN_IGNORE_IF_LOCKED)) + { + /* Caller will free the memory */ + goto failure; + } + + if (file->extra(HA_EXTRA_KEYREAD) || + file->extra(HA_EXTRA_RETRIEVE_ALL_COLS) || + init() || reset()) + { + file->close(); + goto failure; + } + free_file= true; + last_rowid= file->ref; + DBUG_RETURN(0); + +failure: + file= save_file; + DBUG_RETURN(1); +} + +int QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan(bool reuse_handler) +{ + List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); + QUICK_RANGE_SELECT* quick; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_child_scan"); + + /* Initialize all merged "children" quick selects */ + quick_it.rewind(); + DBUG_ASSERT(!(need_to_fetch_row && !reuse_handler)); + if (!need_to_fetch_row && reuse_handler) + { + quick= quick_it++; + /* + There is no use for this->file. Reuse it for first of merged range + selects. + */ + if (quick->init_ror_child_scan(true)) + DBUG_RETURN(1); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + } + while((quick= quick_it++)) + { + if (quick->init_ror_child_scan(false)) + DBUG_RETURN(1); + quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); + /* Share the same record structure in intersect*/ + quick->record= head->record[0]; + } + + if (need_to_fetch_row && head->file->rnd_init()) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +int QUICK_ROR_INTERSECT_SELECT::reset() +{ + int result; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset"); + result= init_ror_child_scan(true); + DBUG_RETURN(result); +} + +bool +QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick) +{ + bool res; + if (head->file->primary_key_is_clustered() && + quick->index == head->primary_key) + { + cpk_quick= quick; + res= 0; + } + else + res= quick_selects.push_back(quick); + return res; +} + +QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() +{ + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT"); + quick_selects.delete_elements(); + delete cpk_quick; + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + +QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, + TABLE *table) + : thd(thd_param), reset_called(false) +{ + index= MAX_KEY; + head= table; + rowid_length= table->file->ref_length; + record= head->record[0]; + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); + my_pthread_setspecific_ptr(THR_MALLOC,&alloc); +} + +int QUICK_ROR_UNION_SELECT::init() +{ + if (init_queue(&queue, quick_selects.elements, 0, + false , QUICK_ROR_UNION_SELECT::queue_cmp, + (void*) this)) + { + bzero(&queue, sizeof(QUEUE)); + return 1; + } + + if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length))) + return 1; + prev_rowid= cur_rowid + head->file->ref_length; + return 0; +} + +/* + Comparison function to be used by priority queue. + SYNPOSIS + QUICK_ROR_UNION_SELECT::queue_cmp() + arg Pointer to QUICK_ROR_UNION_SELECT + val1 First merged select + val2 Second merged select +*/ +int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2) +{ + QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; + return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid, + ((QUICK_SELECT_I*)val2)->last_rowid); +} + +int QUICK_ROR_UNION_SELECT::reset() +{ + QUICK_SELECT_I* quick; + int error; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset"); + have_prev_rowid= false; + /* + Initialize scans for merged quick selects and put all merged quick + selects into the queue. + */ + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + while ((quick= it++)) + { + if (quick->init_ror_child_scan(false)) + DBUG_RETURN(1); + if ((error= quick->get_next())) + { + if (error == HA_ERR_END_OF_FILE) + continue; + else + DBUG_RETURN(error); + } + quick->save_last_pos(); + queue_insert(&queue, (byte*)quick); + } + + if (head->file->rnd_init()) + { + DBUG_PRINT("error", ("ROR index_merge rnd_init call failed")); + DBUG_RETURN(1); + } + + DBUG_RETURN(0); +} + + +bool +QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range) +{ + return quick_selects.push_back(quick_sel_range); +} + +QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT() +{ + DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT"); + delete_queue(&queue); + quick_selects.delete_elements(); + free_root(&alloc,MYF(0)); + DBUG_VOID_RETURN; +} + + QUICK_RANGE::QUICK_RANGE() :min_key(0),max_key(0),min_length(0),max_length(0), flag(NO_MIN_RANGE | NO_MAX_RANGE) @@ -879,6 +1216,136 @@ SEL_ARG *SEL_ARG::clone_tree() return root; } + +/* + Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived + objects from table read plans. +*/ +class TABLE_READ_PLAN +{ +public: + /* + Plan read cost, with or without cost of full row retrieval, depending + on plan creation parameters. + */ + double read_cost; + ha_rows records; /* estimate of #rows to be examined */ + + bool is_ror; + /* Create quck select for this plan */ + virtual QUICK_SELECT_I *make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc=NULL) = 0; + + /* Table read plans are allocated on MEM_ROOT and must not be deleted */ + static void *operator new(size_t size, MEM_ROOT *mem_root) + { return (void*) alloc_root(mem_root, (uint) size); } + static void operator delete(void *ptr,size_t size) {} +}; + +class TRP_ROR_INTERSECT; +class TRP_ROR_UNION; +class TRP_INDEX_MERGE; + + +class TRP_RANGE : public TABLE_READ_PLAN +{ +public: + SEL_ARG *key; + uint key_idx; /* key number in param->keys */ + TRP_RANGE(SEL_ARG *key_arg, uint idx_arg) + : key(key_arg), key_idx(idx_arg) + {} + + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc) + { + DBUG_ENTER("TRP_RANGE::make_quick"); + QUICK_RANGE_SELECT *quick; + /* ignore retrieve_full_rows there as it is set/used elsewhere */ + if ((quick= get_quick_select(param, key_idx, key, parent_alloc))) + { + quick->records= records; + quick->read_time= read_cost; + } + DBUG_RETURN(quick); + } +}; + +class TRP_ROR_INTERSECT : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + struct st_ror_scan_info **first_scan; + struct st_ror_scan_info **last_scan; + bool is_covering; /* true if no row retrieval phase is necessary */ + double index_scan_costs; +}; + +/* + ROR-union is currently never covering. +*/ +class TRP_ROR_UNION : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TABLE_READ_PLAN **first_ror; + TABLE_READ_PLAN **last_ror; + +}; + +class TRP_INDEX_MERGE : public TABLE_READ_PLAN +{ +public: + QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + TRP_RANGE **range_scans; + TRP_RANGE **range_scans_end; +}; + + +/* + Fill param->needed_fields with bitmap of fields used in the query + NOTE + Do not put clustered PK members into it as they are implicitly present in + all keys. +*/ + +static int fill_used_fields_bitmap(PARAM *param) +{ + TABLE *table= param->table; + param->fields_bitmap_size= (table->fields/8 + 1); + uchar *tmp; + uint pk; + if (!(tmp= (uchar*)alloc_root(param->mem_root,param->fields_bitmap_size)) || + bitmap_init(¶m->needed_fields, tmp, param->fields_bitmap_size*8, + false)) + return 1; + + bitmap_clear_all(¶m->needed_fields); + for (uint i= 0; i < table->fields; i++) + { + if (param->thd->query_id == table->field[i]->query_id) + bitmap_set_bit(¶m->needed_fields, i+1); + } + + pk= param->table->primary_key; + if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY) + { + KEY_PART_INFO *key_part= param->table->key_info[pk].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[pk].key_parts; + for(;key_part != key_part_end; ++key_part) + { + bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr); + } + } + return 0; +} + + /* Test if a key can be used in different ranges @@ -887,11 +1354,12 @@ SEL_ARG *SEL_ARG::clone_tree() limit, force_quick_range) Updates the following in the select parameter: - needed_reg - Bits for keys with may be used if all prev regs are read - quick - Parameter to use when reading records. + needed_reg - Bits for keys with may be used if all prev regs are read + quick - Parameter to use when reading records. + In the table struct the following information is updated: - quick_keys - Which keys can be used - quick_rows - How many rows the key matches + quick_keys - Which keys can be used + quick_rows - How many rows the key matches RETURN VALUES -1 if impossible select @@ -910,7 +1378,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, uint basflag; uint idx; double scan_time; - QUICK_INDEX_MERGE_SELECT *quick_imerge= NULL; DBUG_ENTER("test_quick_select"); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", keys_to_use.to_ulonglong(), (ulong) prev_tables, @@ -956,13 +1423,17 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.table=head; param.keys=0; param.mem_root= &alloc; + param.needed_reg= &needed_reg; + param.imerge_cost_buff_size= 0; + thd->no_errors=1; // Don't warn about NULL init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); if (!(param.key_parts = (KEY_PART*) alloc_root(&alloc, sizeof(KEY_PART)* - head->key_parts))) + head->key_parts)) + || fill_used_fields_bitmap(¶m)) { thd->no_errors=0; free_root(&alloc,MYF(0)); // Return memory & allocator @@ -971,7 +1442,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, key_parts= param.key_parts; old_root=my_pthread_getspecific_ptr(MEM_ROOT*,THR_MALLOC); my_pthread_setspecific_ptr(THR_MALLOC,&alloc); - + + /* + Make an array with description of all key parts of all table keys. + This is used in get_mm_parts function. + */ for (idx=0 ; idx < head->keys ; idx++) { if (!keys_to_use.is_set(idx)) @@ -1001,55 +1476,74 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { if (tree->type == SEL_TREE::IMPOSSIBLE) { - records=0L; // Return -1 from this function + records=0L; // Return -1 from this function read_time= (double) HA_POS_ERROR; } else if (tree->type == SEL_TREE::KEY || tree->type == SEL_TREE::KEY_SMALLER) { - /* + TABLE_READ_PLAN *best_trp; + /* It is possible to use a quick select (but maybe it would be slower than 'all' table scan). + Btw, tree type SEL_TREE::INDEX_MERGE was not introduced + intentionally. */ - SEL_ARG **best_key= 0; - ha_rows found_records; - double found_read_time= read_time; - - if (!get_quick_select_params(tree, ¶m, needed_reg, false, - &found_read_time, &found_records, - &best_key)) + if (tree->merges.is_empty()) { + double best_read_time= read_time; + TRP_ROR_INTERSECT *new_trp; + bool can_build_covering= false; + + /* Get best 'range' plan and prepare data for making other plans */ + if ((best_trp= get_key_scans_params(¶m, tree, false, + best_read_time))) + best_read_time= best_trp->read_cost; + /* - Ok, quick select is better than 'all' table scan and we have its - parameters, so construct it. + Simultaneous key scans and row deletes on several handler + objects are not allowed so don't use ROR-intersection for + table deletes. */ - read_time= found_read_time; - records= found_records; - - if ((quick= get_quick_select(¶m,(uint) (best_key-tree->keys), - *best_key)) && (!quick->init())) + if (thd->lex->sql_command != SQLCOM_DELETE) { - quick->records= records; - quick->read_time= read_time; + /* + Get best non-covering ROR-intersection plan and prepare data for + building covering ROR-intersection + */ + if ((new_trp= get_best_ror_intersect(¶m, tree, false, + best_read_time, + &can_build_covering))) + { + best_trp= new_trp; + best_read_time= best_trp->read_cost; + } + + /* + Try constructing covering ROR-intersect only if it looks possible + and worth doing. + */ + if (new_trp && !new_trp->is_covering && can_build_covering && + (new_trp= get_best_covering_ror_intersect(¶m, tree, + best_read_time))) + best_trp= new_trp; } } - - /* - Btw, tree type SEL_TREE::INDEX_MERGE was not introduced - intentionally. - */ - - /* If no range select could be built, try using index_merge. */ - if (!quick && !tree->merges.is_empty()) + else { + /* Try creating index_merge/ROR-union scan. */ + SEL_IMERGE *imerge; + TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp; + LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */ DBUG_PRINT("info",("No range reads possible," " trying to construct index_merge")); - SEL_IMERGE *imerge; - SEL_IMERGE *min_imerge= NULL; - double min_imerge_read_time; - ha_rows min_imerge_records; - LINT_INIT(min_imerge_records); // Protected by min_imerge + /* + Calculate cost of 'all'+index_only scan if it is possible. + It is possible that table can be read in two ways: + a) 'all' + index_only + b) index_merge without index_only. + */ if (!head->used_keys.is_clear_all()) { int key_for_use= find_shortest_key(head, &head->used_keys); @@ -1058,104 +1552,39 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, read_time = get_index_only_read_time(¶m, total_table_records, key_for_use); DBUG_PRINT("info", - ("'all' scan will be using key %d, read time %g", - key_for_use, read_time)); + ("'all'+'using index' scan will be using key %d, " + "read time %g", key_for_use, read_time)); } - - min_imerge_read_time=read_time; - /* - Ok, read_time contains best 'all' read time. - Now look for index_merge with cost < read_time - */ + List_iterator_fast<SEL_IMERGE> it(tree->merges); while ((imerge= it++)) { - if (!get_index_merge_params(¶m, needed_reg, imerge, - &min_imerge_read_time, - &min_imerge_records)) - min_imerge= imerge; - } - - if (!min_imerge) - goto end_free; - - records= min_imerge_records; - - /* Ok, using index_merge is the best option, so construct it. */ - if (!(quick= quick_imerge= new QUICK_INDEX_MERGE_SELECT(thd, head))) - goto end_free; - - quick->records= min_imerge_records; - quick->read_time= min_imerge_read_time; - - my_pthread_setspecific_ptr(THR_MALLOC, &quick_imerge->alloc); - - QUICK_RANGE_SELECT *new_quick; - for (SEL_TREE **ptree = min_imerge->trees; - ptree != min_imerge->trees_next; - ptree++) - { - SEL_ARG **tree_best_key= - min_imerge->best_keys[ptree - min_imerge->trees]; - if ((new_quick= get_quick_select(¶m, - (uint)(tree_best_key- - (*ptree)->keys), - *tree_best_key, - &quick_imerge->alloc))) - { - new_quick->records= min_imerge_records; - new_quick->read_time= min_imerge_read_time; - /* - QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC - pointing to its allocator, restore it back. - */ - quick_imerge->last_quick_select= new_quick; - - if (quick_imerge->push_quick_back(new_quick)) - { - delete new_quick; - delete quick; - quick= quick_imerge= NULL; - goto end_free; - } - } - else - { - delete quick; - quick= quick_imerge= NULL; - goto end_free; - } + new_conj_trp= get_best_disjunct_quick(¶m, imerge, read_time); + if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost < + best_conj_trp->read_cost)) + best_conj_trp= new_conj_trp; } + best_trp= best_conj_trp; + } - free_root(&alloc,MYF(0)); - my_pthread_setspecific_ptr(THR_MALLOC,old_root); - if (quick->init()) + my_pthread_setspecific_ptr(THR_MALLOC, old_root); + if (best_trp) + { + records= best_trp->records; + if (!(quick= best_trp->make_quick(¶m, true)) || quick->init()) { delete quick; - quick= quick_imerge= NULL; - DBUG_PRINT("error", - ("Failed to allocate index merge structures," - "falling back to full scan.")); + quick= NULL; } - goto end; - } + } } } -end_free: + my_pthread_setspecific_ptr(THR_MALLOC, old_root); free_root(&alloc,MYF(0)); // Return memory & allocator - my_pthread_setspecific_ptr(THR_MALLOC,old_root); -end: thd->no_errors=0; } - DBUG_EXECUTE("info", - { - if (quick_imerge) - print_quick_sel_imerge(quick_imerge, &needed_reg); - else - print_quick_sel_range((QUICK_RANGE_SELECT*)quick, &needed_reg); - } - ); + DBUG_EXECUTE("info", print_quick(quick, &needed_reg);); /* Assume that if the user is using 'limit' we will only need to scan @@ -1165,26 +1594,72 @@ end: } -/* - Calculate index merge cost and save parameters for its construction. +/* + Get cost of 'sweep' full row retrieveal of #records rows. + RETURN + 0 - ok + 1 - sweep is more expensive then full table scan. +*/ - SYNOPSIS - get_index_merge_params() - param in parameter with structure. - needed_reg in/out needed_reg from this SQL_SELECT - imerge in index_merge description structure - read_time in/out in: cost of an existing way to read a table - out: cost of index merge - imerge_rows out pessimistic estimate of # of rows to be retrieved +bool get_sweep_read_cost(const PARAM *param, ha_rows records, double* cost, + double index_reads_cost, double max_cost) +{ + if (param->table->file->primary_key_is_clustered()) + { + *cost= param->table->file->read_time(param->table->primary_key, + records, records); + } + else + { + double n_blocks= + ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE); + double busy_blocks= + n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); + if (busy_blocks < 1.0) + busy_blocks= 1.0; + DBUG_PRINT("info",("sweep: nblocks= %g, busy_blocks=%g index_blocks=%g", + n_blocks, busy_blocks, index_reads_cost)); + /* + Disabled: Bail out if # of blocks to read is bigger than # of blocks in + table data file. + if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks) + return 1; + */ + JOIN *join= param->thd->lex->select_lex.join; + if (!join || join->tables == 1) + { + /* No join, assume reading is done in one 'sweep' */ + *cost= busy_blocks*(DISK_SEEK_BASE_COST + + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); + } + else + { + /* + Possibly this is a join with source table being non-last table, so + assume that disk seeks are random here. + */ + *cost = busy_blocks; + } + } + DBUG_PRINT("info",("returning cost=%g", *cost)); + return 0; +} - RETURN - 0 Cost of this index_merge is less than passed *read_time, - *imerge_rows and *read_time contain new index_merge parameters. - 1 Cost of this index_merge is more than *read_time, - *imerge_rows and *read_time are not modified. - -1 error +/* + Get best plan for a SEL_IMERGE disjunctive expression. + SYNOPSIS + get_best_disjunct_quick() + param + imerge + read_time Don't create scans with cost > read_time + RETURN + read plan + NULL - OOM or no read scan could be built. + NOTES + + index_merge cost is calculated as follows: index_merge_cost = cost(index_reads) + (see #1) cost(rowid_to_row_scan) + (see #2) @@ -1231,158 +1706,247 @@ end: (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)). 3. Cost of Unique use is calculated in Unique::get_use_cost function. -*/ - -static int get_index_merge_params(PARAM *param, key_map& needed_reg, - SEL_IMERGE *imerge, double *read_time, - ha_rows* imerge_rows) -{ - double imerge_cost= 0.0; /* cost of this index_merge */ - bool imerge_too_expensive= false; - double tree_read_time; - ha_rows tree_records; - bool pk_is_clustered= param->table->file->primary_key_is_clustered(); - bool have_cpk_scan= false; - ha_rows records_for_unique= 0; - ha_rows cpk_records= 0; - DBUG_ENTER("get_index_merge_params"); - - /* allocate structs to save construction info */ - imerge->best_keys= - (SEL_ARG***)alloc_root(param->mem_root, - (imerge->trees_next - imerge->trees)* - sizeof(void*)); - /* - PHASE 1: get the best keys to use for this index_merge - */ + ROR-union cost is calculated in the same way index_merge, but instead of + Unique a priority queue is used. + +*/ +static +TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, + double read_time) +{ + SEL_TREE **ptree; + TRP_INDEX_MERGE *imerge_trp= NULL; + uint n_child_scans= imerge->trees_next - imerge->trees; + TRP_RANGE **range_scans; + TRP_RANGE **cur_child; + TRP_RANGE **cpk_scan= NULL; + bool imerge_too_expensive= false; + double imerge_cost= 0.0; + ha_rows cpk_scan_records= 0; + ha_rows non_cpk_scan_records= 0; + bool pk_is_clustered= param->table->file->primary_key_is_clustered(); + bool all_scans_ror_able= true; + bool all_scans_rors= true; + uint unique_calc_buff_size; + TABLE_READ_PLAN **roru_read_plans; + TABLE_READ_PLAN **cur_roru_plan; + double roru_index_costs; + double blocks_in_index_read; + ha_rows roru_total_records; + double roru_intersect_part= 1.0; + double sweep_cost; + DBUG_ENTER("get_best_disjunct_quick"); + DBUG_PRINT("info", ("Full table scan cost =%g", read_time)); + + if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, + sizeof(TRP_RANGE*)* + n_child_scans))) + DBUG_RETURN(NULL); /* - It may be possible to use different keys for index_merge scans, e.g. for - query like - ...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4) - we have to make choice between key1 and key2 for one scan and between - key3, key4 for another. - We assume we'll get the best if we choose the best key read inside each - of the conjuncts. + Collect best 'range' scan for each of disjuncts, and, while doing so, + analyze possibility of ROR scans. Also calculate some values needed by + other parts of the code. */ - for (SEL_TREE **ptree= imerge->trees; + for (ptree= imerge->trees, cur_child= range_scans; ptree != imerge->trees_next; - ptree++) + ptree++, cur_child++) { - SEL_ARG **tree_best_key; - tree_read_time= *read_time; - - if (get_quick_select_params(*ptree, param, needed_reg, true, - &tree_read_time, &tree_records, - &tree_best_key)) + DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map, + "tree in SEL_IMERGE");); + if (!(*cur_child= get_key_scans_params(param, *ptree, true, read_time))) { /* One of index scans in this index_merge is more expensive than entire - table read for another available option. The entire index_merge will - be more expensive then, too. We continue here only to update - SQL_SELECT members. + table read for another available option. The entire index_merge (and + any possible ROR-union) will be more expensive then, too. We continue + here only to update SQL_SELECT members. */ imerge_too_expensive= true; } - if (imerge_too_expensive) continue; - uint keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)]; - imerge->best_keys[ptree - imerge->trees]= tree_best_key; - imerge_cost += tree_read_time; - - if (pk_is_clustered && keynr == param->table->primary_key) + imerge_cost += (*cur_child)->read_cost; + all_scans_ror_able &= ((*ptree)->n_ror_scans > 0); + all_scans_rors &= (*cur_child)->is_ror; + if (pk_is_clustered && + param->real_keynr[(*cur_child)->key_idx] == param->table->primary_key) { - have_cpk_scan= true; - cpk_records= tree_records; + cpk_scan= cur_child; + cpk_scan_records= (*cur_child)->records; } else - records_for_unique += tree_records; - } - DBUG_PRINT("info",("index_merge cost of index reads: %g", imerge_cost)); - - if (imerge_too_expensive) - DBUG_RETURN(1); - - if ((imerge_cost > *read_time) || - ((records_for_unique + cpk_records) >= param->table->file->records) && - *read_time != DBL_MAX) - { - /* Bail out if it is obvious that index_merge would be more expensive */ - DBUG_RETURN(1); + non_cpk_scan_records += (*cur_child)->records; } - - if (have_cpk_scan) + + DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost)); + if (imerge_too_expensive || (imerge_cost > read_time) || + (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) && + read_time != DBL_MAX) { /* - Add one ROWID comparison for each row retrieved on non-CPK scan. - (it is done in QUICK_RANGE_SELECT::row_in_ranges) + Bail out if it is obvious that both index_merge and ROR-union will be + more expensive */ - imerge_cost += records_for_unique / TIME_FOR_COMPARE_ROWID; + DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than " + "full table scan, bailing out")); + DBUG_RETURN(NULL); } - - /* PHASE 2: Calculate cost(rowid_to_row_scan) */ - if (pk_is_clustered) + if (all_scans_rors) { - imerge_cost += - param->table->file->read_time(param->table->primary_key, - records_for_unique, - records_for_unique) - + rows2double(records_for_unique) / TIME_FOR_COMPARE; + roru_read_plans= (TABLE_READ_PLAN**)range_scans; + goto skip_to_ror_scan; } - else - { - double n_blocks= - ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE); - double busy_blocks= - n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, (double) records_for_unique)); - - JOIN *join= param->thd->lex->select_lex.join; - if (!join || join->tables == 1) - imerge_cost += busy_blocks*(DISK_SEEK_BASE_COST + - DISK_SEEK_PROP_COST*n_blocks/busy_blocks); - else - { - /* - It can be a join with source table being non-last table, so assume - that disk seeks are random here. - (TODO it is possible to determine if this is a last table in 'index - checked for each record' join) - */ - imerge_cost += busy_blocks; - } - } - DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost)); - - /* PHASE 3: Add Unique operations cost */ - register uint unique_calc_buff_size= - Unique::get_cost_calc_buff_size(records_for_unique, + blocks_in_index_read= imerge_cost; + if (cpk_scan) + { + /* + Add one ROWID comparison for each row retrieved on non-CPK scan. (it + is done in QUICK_RANGE_SELECT::row_in_ranges) + */ + imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; + } + + /* Calculate cost(rowid_to_row_scan) */ + if (get_sweep_read_cost(param, non_cpk_scan_records, &sweep_cost, + blocks_in_index_read, read_time)) + goto build_ror_index_merge; + imerge_cost += sweep_cost; + DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", + imerge_cost)); + + /* Add Unique operations cost */ + unique_calc_buff_size= + Unique::get_cost_calc_buff_size(non_cpk_scan_records, param->table->file->ref_length, param->thd->variables.sortbuff_size); if (param->imerge_cost_buff_size < unique_calc_buff_size) { if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root, unique_calc_buff_size))) - DBUG_RETURN(1); + DBUG_RETURN(NULL); param->imerge_cost_buff_size= unique_calc_buff_size; } imerge_cost += - Unique::get_use_cost(param->imerge_cost_buff, records_for_unique, + Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records, param->table->file->ref_length, param->thd->variables.sortbuff_size); + DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)", + imerge_cost, read_time)); + if (imerge_cost < read_time) + { + if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE)) + { + imerge_trp->read_cost= imerge_cost; + imerge_trp->records= non_cpk_scan_records + cpk_scan_records; + imerge_trp->records= min(imerge_trp->records, + param->table->file->records); + imerge_trp->range_scans= range_scans; + imerge_trp->range_scans_end= range_scans + n_child_scans; + read_time= imerge_cost; + } + } - DBUG_PRINT("info",("index_merge total cost: %g", imerge_cost)); - if (imerge_cost < *read_time) +build_ror_index_merge: + if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE) + DBUG_RETURN(imerge_trp); + + /* Ok, it is possible to build a ROR-union, try it. */ + bool dummy; + if (!(roru_read_plans= + (TABLE_READ_PLAN**)alloc_root(param->mem_root, + sizeof(TABLE_READ_PLAN*)* + n_child_scans))) + DBUG_RETURN(imerge_trp); +skip_to_ror_scan: + roru_index_costs= 0.0; + roru_total_records= 0; + cur_roru_plan= roru_read_plans; + + /* Find 'best' ROR scan for each of trees in disjunction */ + for (ptree= imerge->trees, cur_child= range_scans; + ptree != imerge->trees_next; + ptree++, cur_child++, cur_roru_plan++) { - *read_time= imerge_cost; - records_for_unique += cpk_records; - *imerge_rows= min(records_for_unique, param->table->file->records); - DBUG_RETURN(0); + /* + Assume the best ROR scan is the one that has cheapest full-row-retrieval + scan cost. + Also accumulate index_only scan costs as we'll need them to calculate + overall index_intersection cost. + */ + double cost; + if ((*cur_child)->is_ror) + { + /* Ok, we have index_only cost, now get full rows scan cost */ + cost= param->table->file-> + read_time(param->real_keynr[(*cur_child)->key_idx], 1, + (*cur_child)->records) + + rows2double((*cur_child)->records) / TIME_FOR_COMPARE; + } + else + cost= read_time; + + TABLE_READ_PLAN *prev_plan= *cur_child; + if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, false, cost, + &dummy))) + { + if (prev_plan->is_ror) + *cur_roru_plan= prev_plan; + else + DBUG_RETURN(imerge_trp); + roru_index_costs += (*cur_roru_plan)->read_cost; + } + else + roru_index_costs += + ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs; + roru_total_records += (*cur_roru_plan)->records; + roru_intersect_part *= (*cur_roru_plan)->records / + param->table->file->records; } - DBUG_RETURN(1); + + /* + rows to retrieve= + SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows). + This is valid because index_merge constructuion guarantees that conditions + in disjunction do not share key parts. + */ + roru_total_records -= (ha_rows)(roru_intersect_part* + param->table->file->records); + /* ok, got a ROR read plan for each of the disjuncts + Calculate cost: + cost(index_union_scan(scan_1, ... scan_n)) = + SUM_i(cost_of_index_only_scan(scan_i)) + + queue_use_cost(rowid_len, n) + + cost_of_row_retrieval + See get_merge_buffers_cost function for queue_use_cost formula derivation. + */ + + if (get_sweep_read_cost(param, roru_total_records, &sweep_cost, + roru_index_costs, read_time)) + DBUG_RETURN(NULL); + double roru_total_cost; + roru_total_cost= roru_index_costs + + rows2double(roru_total_records)*log(n_child_scans) / + (TIME_FOR_COMPARE_ROWID * M_LN2) + + sweep_cost; + DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost, + n_child_scans)); + TRP_ROR_UNION* roru; + if (roru_total_cost < read_time) + { + if ((roru= new (param->mem_root) TRP_ROR_UNION)) + { + roru->first_ror= roru_read_plans; + roru->last_ror= roru_read_plans + n_child_scans; + roru->read_cost= roru_total_cost; + roru->records= roru_total_records; + DBUG_RETURN(roru); + } + } + DBUG_RETURN(imerge_trp); } @@ -1401,7 +1965,7 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg, key blocks are half full (normally things are much better). */ -inline double get_index_only_read_time(PARAM* param, ha_rows records, +inline double get_index_only_read_time(const PARAM* param, ha_rows records, int keynr) { double read_time; @@ -1413,39 +1977,693 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records, return read_time; } +typedef struct st_ror_scan_info +{ + uint idx; /* # of used key in param->keys */ + uint keynr; /* # of used key in table */ + ha_rows records; + SEL_ARG *sel_arg; + /* Fields used in the query and covered by this ROR scan */ + MY_BITMAP covered_fields; + uint used_fields_covered; + int key_rec_length; /* length of key record with rowid */ + + /* + Array of + #rows(keypart_1=c1 AND ... AND key_part_i=c_i) / + #rows(keypart_1=c1 AND ... AND key_part_{i+1}=c_{i+1}) values + */ + double *key_part_rows; + double index_read_cost; + uint first_uncovered_field; + uint key_components; +}ROR_SCAN_INFO; + + +/* + Create ROR_SCAN_INFO* structure for condition sel_arg on key idx. + SYNOPSIS + make_ror_scan() + param + idx index of key in param->keys + RETURN + NULL - OOM. +*/ + +static +ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) +{ + ROR_SCAN_INFO *ror_scan; + uchar *bitmap_buf; + uint keynr; + DBUG_ENTER("make_ror_scan"); + if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO)))) + DBUG_RETURN(NULL); + + ror_scan->idx= idx; + ror_scan->keynr= keynr= param->real_keynr[idx]; + ror_scan->key_rec_length= param->table->key_info[keynr].key_length + + param->table->file->ref_length; + ror_scan->sel_arg= sel_arg; + ror_scan->records= param->table->quick_rows[keynr]; + + if (!(bitmap_buf= (uchar*)alloc_root(param->mem_root, + param->fields_bitmap_size))) + DBUG_RETURN(NULL); + + if (bitmap_init(&ror_scan->covered_fields, bitmap_buf, + param->fields_bitmap_size*8, false)) + DBUG_RETURN(NULL); + bitmap_clear_all(&ror_scan->covered_fields); + if (!(ror_scan->key_part_rows= + (double*)alloc_root(param->mem_root, sizeof(double)* + param->table->key_info[keynr].key_parts))) + DBUG_RETURN(NULL); + + KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part; + KEY_PART_INFO *key_part_end= key_part + + param->table->key_info[keynr].key_parts; + uint n_used_covered= 0; + for (;key_part != key_part_end; ++key_part) + { + if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr)) + { + n_used_covered++; + bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr); + } + } + ror_scan->index_read_cost= + get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr], + ror_scan->keynr); + /* + Calculate # rows estimates for + (key_part1=c1) + (key_part1=c1 AND key_part2=c2) + ...and so on + */ + char key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* values of current key */ + char *key_ptr= key_val; + ha_rows records; + ha_rows prev_records= param->table->file->records; + double *rows_diff= ror_scan->key_part_rows; + key_part= param->table->key_info[keynr].key_part; + SEL_ARG *arg= ror_scan->sel_arg; + /* + We have #rows estimate already for first key part, so do first loop + iteration separately: + */ + arg->store_min(key_part->length, &key_ptr, 0); + prev_records= param->table->quick_rows[ror_scan->keynr]; + *(rows_diff++)= rows2double(prev_records) / param->table->file->records; + arg=arg->next_key_part; + for(; arg; ++key_part, arg= arg->next_key_part) + { + arg->store_min(key_part->length, &key_ptr, 0); + records= + param->table->file->records_in_range(ror_scan->keynr, + (byte*)key_val, key_ptr - key_val, + HA_READ_KEY_EXACT, + (byte*)key_val, key_ptr - key_val, + HA_READ_AFTER_KEY); + if (records == HA_POS_ERROR) + return NULL; /* shouldn't happen actually */ + *(rows_diff++)= rows2double(records) / rows2double(prev_records); + prev_records= records; + } + DBUG_RETURN(ror_scan); +} + + +/* + Order ROR_SCAN_INFO** by + E(#records_matched) * key_record_length +*/ +int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + double val1= rows2double((*a)->records) * (*a)->key_rec_length; + double val2= rows2double((*b)->records) * (*b)->key_rec_length; + return (val1 < val2)? -1: (val1 == val2)? 0 : 1; +} + +/* + Order ROR_SCAN_INFO** by + (#covered fields in F desc, + #components asc, + number of first not covered component asc) +*/ +int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b) +{ + if ((*a)->used_fields_covered > (*b)->used_fields_covered) + return -1; + if ((*a)->used_fields_covered < (*b)->used_fields_covered) + return 1; + if ((*a)->key_components < (*b)->key_components) + return -1; + if ((*a)->key_components > (*b)->key_components) + return 1; + if ((*a)->first_uncovered_field < (*b)->first_uncovered_field) + return -1; + if ((*a)->first_uncovered_field > (*b)->first_uncovered_field) + return 1; + return 0; +} + +/* Auxiliary structure for incremental ROR-intersection creation */ +typedef struct +{ + const PARAM *param; + MY_BITMAP covered_fields; /* union of fields covered by all scans */ + + /* true if covered_fields is a superset of needed_fields */ + bool is_covering; + + double index_scan_costs; /* SUM(cost of 'index-only' scans) */ + double total_cost; + /* + Fraction of table records that satisfies conditions of all scans. + This is the number of full records that will be retrieved if a + non-index_only index intersection will be employed. + */ + double records_fract; + ha_rows index_records; /* sum(#records to look in indexes) */ +}ROR_INTERSECT_INFO; + + +/* Allocate a ROR_INTERSECT and initialize it to contain zero scans */ +ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param, bool is_index_only) +{ + ROR_INTERSECT_INFO *info; + uchar* buf; + if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root, + sizeof(ROR_INTERSECT_INFO)))) + return NULL; + info->param= param; + info->is_covering= is_index_only; + info->index_scan_costs= 0.0f; + info->records_fract= 1.0f; + + if (!(buf= (uchar*)alloc_root(param->mem_root, param->fields_bitmap_size))) + return NULL; + if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8, + false)) + return NULL; + bitmap_clear_all(&info->covered_fields); + return info; +} + +/* + Check if it makes sense to add a ROR scan to ROR-intersection, and if yes + update parameters of ROR-intersection, including its cost. + + RETURN + true ROR scan added to ROR-intersection, cost updated. + false It doesn't make sense to add this ROR scan to this ROR-intersection. + + NOTE + Adding a ROR scan to ROR-intersect "makes sense" iff selectivt + + Cost of ROR-intersection is calulated as follows: + cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval + + if (union of indexes used covers all needed fields) + cost_of_full_rows_retrieval= 0; + else + { + cost_of_full_rows_retrieval= + cost_of_sweep_read(E(rows_to_retrive), rows_in_table); + } + + E(rows_to_retrive) is calulated as follows: + Suppose we have a condition on several keys + cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key + k_21=c_21 AND k_22=c_22 AND ... // parts of second key + ... + k_n1=c_n1 AND k_n3=c_n3 AND ... (1) + + where k_ij may be the same as any k_pq (i.e. keys may have common parts). + + A full row is retrieved iff entire cond holds. + + The recursive procedure for finding P(cond) is as follows: + + First step: + Pick 1st part of 1st key and break conjunction (1) into two parts: + cond= (k_11=c_11 AND R) + + Here R may still contain condition(s) equivalent to k_11=c_11. + Nevertheless, the following holds: + + P(k_11=c_11 AND R) = P(k_11=c_11) * P(R|k_11=c_11). + + Mark k_11 as fixed field (and satisfied condition) F, save P(F), + save R to be cond and proceed to recursion step. + + Recursion step: + We have set of fixed fields/satisfied conditions) F, probability P(F), + and remaining conjunction R + Pick next key part on current key and its condition "k_ij=c_ij". + We will add "k_ij=c_ij" into F and update P(F). + Lets denote k_ij as t, R = t AND R1, where i1 may still contain t. Then + + P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R|(t AND F)) (2) + + (where '|' mean conditional probability, not "or") + + Consider the first multiplier in (2). One of the following holds: + a) F contains condition on field used in t (i.e. t AND F = F). + Then P(t|F) = 1 + + b) F doesn't contain condition on field used in t. Then F and t are + considered independent. + + P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = + = P(t|fields_before_t_in_key). + + P(t|fields_before_t_in_key)= #distinct_values(fields_before_t_in_key) / + #distinct_values(fields_before_t_in_key, t) + + The second multiplier is calculated by applying this step recusively. + + This function applies recursion steps for all fixed key members of + one key, accumulating sets of covered fields and + The very first step described is done as recursion step with + P(fixed fields)=1 and empty set of fixed fields. +*/ + +bool ror_intersect_add(ROR_INTERSECT_INFO *info, ROR_SCAN_INFO* ror_scan) +{ + int i; + SEL_ARG *sel_arg; + KEY_PART_INFO *key_part= + info->param->table->key_info[ror_scan->keynr].key_part; + double selectivity_mult= 1.0; + DBUG_ENTER("ror_intersect_add"); + DBUG_PRINT("info", ("Current selectivity= %g", info->records_fract)); + DBUG_PRINT("info", ("Adding scan on %s", + info->param->table->key_info[ror_scan->keynr].name)); + for(i= 0, sel_arg= ror_scan->sel_arg; sel_arg; + i++, sel_arg= sel_arg->next_key_part) + { + if (!bitmap_is_set(&info->covered_fields, (key_part + i)->fieldnr)) + { + /* + P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) = + = P(t|fields_before_t_in_key). + */ + selectivity_mult *= ror_scan->key_part_rows[i]; + } + } + if (selectivity_mult == 1.0) + { + /* Don't add this scan if it doesn't improve selectivity. */ + DBUG_PRINT("info", ("This scan doesn't improve selectivity.")); + DBUG_RETURN(false); + } + info->records_fract *= selectivity_mult; + + bitmap_union(&info->covered_fields, &ror_scan->covered_fields); + + ha_rows scan_records= info->param->table->quick_rows[ror_scan->keynr]; + info->index_scan_costs += ror_scan->index_read_cost; + + if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields, + &info->covered_fields)) + { + DBUG_PRINT("info", ("ROR-intersect is covering now")); + /* ROR-intersect became covering */ + info->is_covering= true; + } + + info->index_records += scan_records; + info->total_cost= info->index_scan_costs; + if (!info->is_covering) + { + ha_rows table_recs= info->param->table->file->records; + double sweep_cost; + get_sweep_read_cost(info->param, + (ha_rows)(table_recs*info->records_fract), + &sweep_cost, info->index_scan_costs, DBL_MAX); + + info->total_cost += sweep_cost; + } + DBUG_PRINT("info", ("New selectivity= %g", info->records_fract)); + DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost, + info->is_covering?"" : "non-")); + DBUG_RETURN(true); +} + +/* + Get best ROR-intersection plan using non-covering ROR-intersection search + algorithm. The returned plan may be covering. + + SYNOPSIS + get_best_ror_intersect() + param + tree + force_index_only If true, don't calculate costs of full rows retrieval. + read_time Do not return read plans with cost > read_time. + are_all_covering [out] set to true if union of all scans covers all + fields needed by the query (and it is possible to build + a covering ROR-intersection) + RETURN + ROR-intersection table read plan + NULL if OOM or no plan found. + + NOTE + get_key_scans_params must be called before for the same SEL_TREE before + this function can be called. + + The approximate best non-covering plan search algorithm is as follows: + + find_min_ror_intersection_scan() + { + R= select all ROR scans; + order R by (E(#records_matched) * key_record_length). + + S= first(R); -- set of scans that will be used for ROR-intersection + R= R-first(S); + min_cost= cost(S); + min_scan= make_scan(S); + while (R is not empty) + { + if (!selectivity(S + first(R) < selectivity(S))) + continue; + + S= S + first(R); + R= R - first(R); + if (cost(S) < min_cost) + { + min_cost= cost(S); + min_scan= make_scan(S); + } + } + return min_scan; + } + + See ror_intersect_add function for ROR intersection costs. + + Special handling for Clustered PK scans + Clustered PK contains all table fields, so using it as a regular scan in + index intersection doesn't make sense: a range scan on CPK will be less + expensive in this case. + Clustered PK scan has special handling in ROR-intersection: it is not used + to retrieve rows, instead its condition is used to filter row references + we get from scans on other keys. +*/ + +static +TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, + bool force_index_only, + double read_time, + bool *are_all_covering) +{ + uint idx; + double min_cost= read_time; + DBUG_ENTER("get_best_ror_intersect"); + + if (tree->n_ror_scans < 2) + DBUG_RETURN(NULL); + + /* Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of them */ + ROR_SCAN_INFO **cur_ror_scan; + if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + param->keys))) + return NULL; + + for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++) + { + if (!tree->ror_scans_map.is_set(idx)) + continue; + if (!(*cur_ror_scan= make_ror_scan(param, idx, tree->keys[idx]))) + return NULL; + cur_ror_scan++; + } + + tree->ror_scans_end= cur_ror_scan; + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original", + tree->ror_scans, + tree->ror_scans_end);); + /* + Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized + ROR_SCAN_INFOs. + Get a minimal key scan using an approximate algorithm. + */ + qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info); + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered", + tree->ror_scans, + tree->ror_scans_end);); + + ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */ + ROR_SCAN_INFO **intersect_scans_end; + if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + tree->n_ror_scans))) + return NULL; + intersect_scans_end= intersect_scans; + + /* Create and incrementally update ROR intersection. */ + ROR_INTERSECT_INFO *intersect; + if (!(intersect= ror_intersect_init(param, false))) + return NULL; + + /* [intersect_scans, intersect_scans_best) will hold the best combination */ + ROR_SCAN_INFO **intersect_scans_best= NULL; + ha_rows best_rows; + bool is_best_covering; + double best_index_scan_costs; + LINT_INIT(best_rows); /* protected by intersect_scans_best */ + LINT_INIT(is_best_covering); + LINT_INIT(best_index_scan_costs); + + cur_ror_scan= tree->ror_scans; + while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering) + { + /* S= S + first(R); */ + if (ror_intersect_add(intersect, *cur_ror_scan)) + *(intersect_scans_end++)= *cur_ror_scan; + /* R= R-first(R); */ + cur_ror_scan++; + + if (intersect->total_cost < min_cost) + { + /* Local minimum found, save it */ + min_cost= intersect->total_cost; + best_rows= (ha_rows)(intersect->records_fract* + rows2double(param->table->file->records)); + is_best_covering= intersect->is_covering; + intersect_scans_best= intersect_scans_end; + best_index_scan_costs= intersect->index_scan_costs; + } + } + + /* Ok, return ROR-intersect plan if we have found one */ + *are_all_covering= intersect->is_covering; + uint best_num= intersect_scans_best - intersect_scans; + TRP_ROR_INTERSECT *trp= NULL; + if (intersect_scans_best && best_num > 1) + { + DBUG_EXECUTE("info",print_ror_scans_arr(param->table, + "used for ROR-intersect", + intersect_scans, + intersect_scans_best);); + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + if (!(trp->first_scan= + (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)*best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= is_best_covering; + trp->read_cost= min_cost; + trp->records= best_rows? best_rows : 1; + trp->index_scan_costs= best_index_scan_costs; + } + DBUG_RETURN(trp); +} + + +/* + Get best covering ROR-intersection. + SYNOPSIS + get_best_covering_ror_intersect() + param + tree SEL_TREE + read_time Dont return table read plans with cost > read_time. + + RETURN + Best covering ROR-intersection plan + NULL if no plan found. + + NOTE + get_best_ror_intersect must be called for a tree before calling this + function for it. + This function invalidates tree->ror_scans member values. + + The following approximate algorithm is used: + I=set of all covering indexes + F=set of all fields to cover + S={} + + do { + Order I by (#covered fields in F desc, + #components asc, + number of first not covered component asc); + F=F-covered by first(I); + S=S+first(I); + I=I-first(I); + } while F is not empty. +*/ + +static +TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, + SEL_TREE *tree, + double read_time) +{ + ROR_SCAN_INFO **ror_scan_mark; + ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end; + DBUG_ENTER("get_best_covering_ror_intersect"); + uint nbits= param->fields_bitmap_size*8; + + for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan) + (*scan)->key_components= + param->table->key_info[(*scan)->keynr].key_parts; + + /* + Run covering-ROR-search algorithm. + Assume set I is [ror_scan .. ror_scans_end) + */ + + /*I=set of all covering indexes */ + ror_scan_mark= tree->ror_scans; + + uchar buf[MAX_KEY/8+1]; + MY_BITMAP covered_fields; + if (bitmap_init(&covered_fields, buf, nbits, false)) + DBUG_RETURN(0); + bitmap_clear_all(&covered_fields); + + double total_cost= 0.0f; + ha_rows records=0; + bool all_covered; + + /* Start will all scans and remove one by one until */ + DBUG_PRINT("info", ("Building covering ROR-intersection")); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "building covering ROR-I", + ror_scan_mark, ror_scans_end);); + do { + /* + Update changed sorting info: + #covered fields, + number of first not covered component + Calculate and save these values for each of remaining scans. + */ + for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan) + { + bitmap_subtract(&(*scan)->covered_fields, &covered_fields); + (*scan)->used_fields_covered= + bitmap_bits_set(&(*scan)->covered_fields); + (*scan)->first_uncovered_field= + bitmap_get_first(&(*scan)->covered_fields); + } + + qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*), + (qsort_cmp)cmp_ror_scan_info_covering); + + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "remaining scans", + ror_scan_mark, ror_scans_end);); + + /* I=I-first(I) */ + total_cost += (*ror_scan_mark)->index_read_cost; + records += (*ror_scan_mark)->records; + DBUG_PRINT("info", ("Adding scan on %s", + param->table->key_info[(*ror_scan_mark)->keynr].name)); + if (total_cost > read_time) + DBUG_RETURN(NULL); + /* F=F-covered by first(I) */ + bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields); + all_covered= bitmap_is_subset(¶m->needed_fields, &covered_fields); + } while (!all_covered && (++ror_scan_mark < ror_scans_end)); + + if (!all_covered) + DBUG_RETURN(NULL); /* should not happen actually */ + + /* + Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with + cost total_cost. + */ + DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost)); + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating covering ROR-intersect", + tree->ror_scans, ror_scan_mark);); + + /* Add priority queue use cost. */ + total_cost += rows2double(records)*log(ror_scan_mark - tree->ror_scans) / + (TIME_FOR_COMPARE_ROWID * M_LN2); + DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost)); + + if (total_cost > read_time) + DBUG_RETURN(NULL); + + TRP_ROR_INTERSECT *trp; + if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) + DBUG_RETURN(trp); + uint best_num= (ror_scan_mark - tree->ror_scans); + if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root, + sizeof(ROR_SCAN_INFO*)* + best_num))) + DBUG_RETURN(NULL); + memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*)); + trp->last_scan= trp->first_scan + best_num; + trp->is_covering= true; + trp->read_cost= total_cost; + trp->records= records; + + DBUG_RETURN(trp); +} + /* - Calculate quick range select read time, # of records, and best key to use - without constructing QUICK_RANGE_SELECT object. + Get best "range" table read plan for given SEL_TREE. + Also update PARAM members and store ROR scans info in the SEL_TREE. SYNOPSIS get_quick_select_params - tree in make range select for this SEL_TREE - param in parameters from test_quick_select - needed_reg in/out other table data needed by this quick_select - index_read_must_be_used if true, assume 'index only' option will be set + param parameters from test_quick_select + tree make range select for this SEL_TREE + index_read_must_be_used if true, assume 'index only' option will be set (except for clustered PK indexes) - read_time out read time estimate - records out # of records estimate - key_to_read out SEL_ARG to be used for creating quick select + read_time don't create read plans with cost > read_time. + RETURN + Best range read plan + NULL if no plan found or error occurred */ -static int get_quick_select_params(SEL_TREE *tree, PARAM *param, - key_map& needed_reg, - bool index_read_must_be_used, - double *read_time, ha_rows *records, - SEL_ARG ***key_to_read) +static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, + bool index_read_must_be_used, + double read_time) { int idx; - int result = 1; + SEL_ARG **key,**end, **key_to_read= NULL; + ha_rows best_records; + TRP_RANGE* read_plan= NULL; bool pk_is_clustered= param->table->file->primary_key_is_clustered(); + DBUG_ENTER("get_key_scans_params"); + LINT_INIT(best_records); /* protected by key_to_read */ /* Note that there may be trees that have type SEL_TREE::KEY but contain no key reads at all, e.g. tree for expression "key1 is not null" where key1 is defined as "not null". - */ - SEL_ARG **key,**end; - - for (idx= 0,key=tree->keys, end=key+param->keys ; + */ + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map, + "tree scans");); + tree->ror_scans_map.clear_all(); + tree->n_ror_scans= 0; + for (idx= 0,key=tree->keys, end=key+param->keys; key != end ; key++,idx++) { @@ -1456,19 +2674,24 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param, uint keynr= param->real_keynr[idx]; if ((*key)->type == SEL_ARG::MAYBE_KEY || (*key)->maybe_flag) - needed_reg.set_bit(keynr); + param->needed_reg->set_bit(keynr); bool read_index_only= index_read_must_be_used? true : (bool)param->table->used_keys.is_set(keynr); - found_records=check_quick_select(param, idx, *key); + found_records= check_quick_select(param, idx, *key); + if (param->is_ror_scan) + { + tree->n_ror_scans++; + tree->ror_scans_map.set_bit(idx); + } if (found_records != HA_POS_ERROR && found_records > 2 && read_index_only && (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) && !(pk_is_clustered && keynr == param->table->primary_key)) { /* We can resolve this by only reading through this key. */ - found_read_time=get_index_only_read_time(param, found_records, keynr); + found_read_time= get_index_only_read_time(param,found_records,keynr); } else { @@ -1481,18 +2704,127 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param, found_records)+ (double) found_records / TIME_FOR_COMPARE); } - if (*read_time > found_read_time && found_records != HA_POS_ERROR) + if (read_time > found_read_time && found_records != HA_POS_ERROR + /*|| read_time == DBL_MAX*/ ) + { + read_time= found_read_time; + best_records= found_records; + key_to_read= key; + } + + } + } + + DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map, + "ROR scans");); + if (key_to_read) + { + idx= key_to_read - tree->keys; + if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx))) + { + read_plan->records= best_records; + read_plan->is_ror= tree->ror_scans_map.is_set(idx); + read_plan->read_cost= read_time; + DBUG_PRINT("info",("Returning range plan for key %s, cost %g", + param->table->key_info[param->real_keynr[idx]].name, + read_plan->read_cost)); + } + } + else + DBUG_PRINT("info", ("No 'range' table read plan found")); + + DBUG_RETURN(read_plan); +} + + +QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_INDEX_MERGE_SELECT *quick_imerge; + QUICK_RANGE_SELECT *quick; + /* index_merge always retrieves full rows, ignore retrieve_full_rows */ + if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table))) + return NULL; + + quick_imerge->records= records; + quick_imerge->read_time= read_cost; + for(TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end; + range_scan++) + { + if (!(quick= (QUICK_RANGE_SELECT*) + ((*range_scan)->make_quick(param, false, &quick_imerge->alloc)))|| + quick_imerge->push_quick_back(quick)) + { + delete quick; + delete quick_imerge; + return NULL; + } + } + return quick_imerge; +} + +QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_INTERSECT_SELECT *quick_intrsect; + QUICK_RANGE_SELECT *quick; + DBUG_ENTER("TRP_ROR_INTERSECT::make_quick"); + MEM_ROOT *alloc; + + if ((quick_intrsect= + new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table, + retrieve_full_rows? (!is_covering):false, + parent_alloc))) + { + DBUG_EXECUTE("info", print_ror_scans_arr(param->table, + "creating ROR-intersect", + first_scan, last_scan);); + alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc; + for(; first_scan != last_scan;++first_scan) + { + if (!(quick= get_quick_select(param, (*first_scan)->idx, + (*first_scan)->sel_arg, alloc)) || + quick_intrsect->push_quick_back(quick)) { - *read_time= found_read_time; - *records= found_records; - *key_to_read= key; - result = 0; + delete quick_intrsect; + DBUG_RETURN(NULL); } } + quick_intrsect->records= records; + quick_intrsect->read_time= read_cost; } - return result; + DBUG_RETURN(quick_intrsect); } +QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc) +{ + QUICK_ROR_UNION_SELECT *quick_roru; + TABLE_READ_PLAN **scan; + QUICK_SELECT_I *quick; + DBUG_ENTER("TRP_ROR_UNION::make_quick"); + /* + It is currently impossible to construct a ROR-union that will + not retrieve full rows, ingore retrieve_full_rows. + */ + if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table))) + { + for(scan= first_ror; scan != last_ror; scan++) + { + if (!(quick= (*scan)->make_quick(param, false, &quick_roru->alloc)) || + quick_roru->push_quick_back(quick)) + DBUG_RETURN(NULL); + } + quick_roru->records= records; + quick_roru->read_time= read_cost; + } + DBUG_RETURN(quick_roru); +} + +/****************************************************************************/ /* make a select tree of all keys in condition */ static SEL_TREE *get_mm_tree(PARAM *param,COND *cond) @@ -2975,34 +4307,88 @@ void SEL_ARG::test_use_count(SEL_ARG *root) /***************************************************************************** ** Check how many records we will find by using the found tree +** NOTE +** param->table->quick_* and param->range_count (and maybe others) are +** updated with data of given key scan. *****************************************************************************/ static ha_rows check_quick_select(PARAM *param,uint idx,SEL_ARG *tree) { ha_rows records; + bool cpk_scan; + uint key; DBUG_ENTER("check_quick_select"); if (!tree) DBUG_RETURN(HA_POS_ERROR); // Can't use it param->max_key_part=0; param->range_count=0; + key= param->real_keynr[idx]; + if (tree->type == SEL_ARG::IMPOSSIBLE) DBUG_RETURN(0L); // Impossible select. return if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0) DBUG_RETURN(HA_POS_ERROR); // Don't use tree + + enum ha_key_alg key_alg= param->table->key_info[key].algorithm; + if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF)) + { + /* Records are not ordered by rowid for other types of indexes. */ + param->is_ror_scan= false; + cpk_scan= false; + } + else + { + /* + Clustered PK scan is a special case, check_quick_keys doesn't recognize + CPK scans as ROR scans (while actually any CPK scan is a ROR scan). + */ + cpk_scan= (param->table->primary_key == param->real_keynr[idx]) && + param->table->file->primary_key_is_clustered(); + param->is_ror_scan= !cpk_scan; + } + records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0); if (records != HA_POS_ERROR) - { - uint key=param->real_keynr[idx]; + { param->table->quick_keys.set_bit(key); param->table->quick_rows[key]=records; param->table->quick_key_parts[key]=param->max_key_part+1; + + if (cpk_scan) + param->is_ror_scan= true; } DBUG_RETURN(records); } +/* + SYNOPSIS + check_quick_keys() + param + idx key to use, its number in list of used keys (that is, + param->real_keynr[idx] holds the key number in table) + + key_tree SEL_ARG tree which cost is calculated. + min_key buffer with min key value tuple + min_key_flag + max_key buffer with max key value tuple + max_key_flag + + NOTE + The function does the recursive descent on the tree via left, right, and + next_key_part edges. The #rows estimates are calculated at the leaf nodes. + + param->min_key and param->max_key are used to hold key segment values. + + The side effects are: + param->max_key_part is updated to hold the maximum number of key parts used + in scan minus 1. + param->range_count is updated. + param->is_ror_scan is updated. +*/ + static ha_rows check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, char *min_key,uint min_key_flag, char *max_key, @@ -3013,6 +4399,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, param->max_key_part=max(param->max_key_part,key_tree->part); if (key_tree->left != &null_element) { + param->is_ror_scan= false; records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag, max_key,max_key_flag); if (records == HA_POS_ERROR) // Impossible @@ -3040,6 +4427,9 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, tmp_max_key, max_key_flag | key_tree->max_flag); goto end; // Ugly, but efficient } + else + param->is_ror_scan= false; + tmp_min_flag=key_tree->min_flag; tmp_max_flag=key_tree->max_flag; if (!tmp_min_flag) @@ -3068,6 +4458,15 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, tmp=1; // Max one record else { + if (param->is_ror_scan) + { + if (!(min_key_length == max_key_length && + !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) && + !key_tree->min_flag && !key_tree->max_flag && + is_key_scan_ror(param, keynr, key_tree->part + 1))) + param->is_ror_scan= false; + } + if (tmp_min_flag & GEOM_FLAG) { tmp= param->table->file-> @@ -3098,6 +4497,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, records+=tmp; if (key_tree->right != &null_element) { + param->is_ror_scan= false; tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag, max_key,max_key_flag); if (tmp == HA_POS_ERROR) @@ -3107,12 +4507,50 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree, return records; } +/* + Check if key scan on key keynr with first nparts key parts fixed is a + ROR scan. This function doesn't handle clustered PK scans or HASH index + scans. +*/ +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) +{ + KEY *table_key= param->table->key_info + keynr; + KEY_PART_INFO *key_part= table_key->key_part + nparts; + KEY_PART_INFO *key_part_end= table_key->key_part + + table_key->key_parts; + + if (key_part == key_part_end) + return true; + uint pk_number= param->table->primary_key; + if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY) + return false; + + KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part; + KEY_PART_INFO *pk_part_end= pk_part + + param->table->key_info[pk_number].key_parts; + for(;(key_part!=key_part_end) && (pk_part != pk_part_end); + ++key_part, ++pk_part) + { + if (key_part->field != pk_part->field) + return false; + } + return (key_part == key_part_end); +} -/**************************************************************************** -** change a tree to a structure to be used by quick_select -** This uses it's own malloc tree -** The caller should call QUICK_SELCT::init for returned quick select -****************************************************************************/ + +/* + Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key. + This uses it's own malloc tree. + SYNOPSIS + get_quick_select() + param + idx index of used key in param->key. + key_tree SEL_ARG tree for the used key + parent_alloc if not NULL, use it to allocate memory for + quick select data. Otherwise use quick->alloc. + + The caller should call QUICK_SELCT::init for returned quick select +*/ QUICK_RANGE_SELECT * get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree, MEM_ROOT *parent_alloc) @@ -3292,6 +4730,47 @@ static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length) return 0; } +bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields) +{ + return check_if_key_used(head, index, *fields); +} + +bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields) +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + if (check_if_key_used(head, quick->index, *fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields) +{ + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + if (check_if_key_used(head, quick->index, *fields)) + return 1; + } + return 0; +} + +bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields) +{ + QUICK_SELECT_I *quick; + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + while ((quick= it++)) + { + if (quick->check_if_keys_used(fields)) + return 1; + } + return 0; +} + /**************************************************************************** ** Create a QUICK RANGE based on a key ****************************************************************************/ @@ -3351,8 +4830,6 @@ err: } -#define MEM_STRIP_BUF_SIZE thd->variables.sortbuff_size - /* Fetch all row ids into unique. @@ -3386,9 +4863,9 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique() cur_quick_select->init(); - unique= new Unique(refposcmp2, (void *) &head->file->ref_length, + unique= new Unique(refpos_order_cmp, (void *)head->file, head->file->ref_length, - MEM_STRIP_BUF_SIZE); + thd->variables.sortbuff_size); if (!unique) DBUG_RETURN(1); do @@ -3421,8 +4898,7 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique() continue; cur_quick_select->file->position(cur_quick_select->record); - result= unique->unique_add((char*)cur_quick_select->file->ref); - + result= unique->unique_add((char*)cur_quick_select->file->ref); if (result) DBUG_RETURN(1); @@ -3475,6 +4951,143 @@ int QUICK_INDEX_MERGE_SELECT::get_next() DBUG_RETURN(result); } + +/* + NOTES + Invariant on enter/exit: all intersected selects have retrieved index + records with rowid <= some_rowid_val and no intersected select has + retrieved any index records with rowid > some_rowid_val. + We start fresh and loop until we have retrieved the same rowid in each of + the key scans or we got an error. + + If a Clustered PK scan is present, it is used only to check if row + satisfies its conditions (and never used for row retrieval). +*/ + +int QUICK_ROR_INTERSECT_SELECT::get_next() +{ + List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); + QUICK_RANGE_SELECT* quick; + int error, cmp; + uint last_rowid_count=0; + DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next"); + + /* Get a rowid for first quick and save it as a 'candidate' */ + quick= quick_it++; + if (cpk_quick) + { + do { + error= quick->get_next(); + }while (!error && !cpk_quick->row_in_ranges()); + } + else + error= quick->get_next(); + + if (error) + DBUG_RETURN(error); + + quick->file->position(quick->record); + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + last_rowid_count= 1; + + while (last_rowid_count < quick_selects.elements) + { + if (!(quick= quick_it++)) + { + quick_it.rewind(); + quick= quick_it++; + } + + do { + if ((error= quick->get_next())) + DBUG_RETURN(error); + quick->file->position(quick->record); + cmp= head->file->cmp_ref(quick->file->ref, last_rowid); + } while (cmp < 0); + + /* Ok, current select 'caught up' and returned ref >= cur_ref */ + if (cmp > 0) + { + /* Found a row with ref > cur_ref. Make it a new 'candidate' */ + if (cpk_quick) + { + while (!cpk_quick->row_in_ranges()) + { + if ((error= quick->get_next())) + DBUG_RETURN(error); + } + } + memcpy(last_rowid, quick->file->ref, head->file->ref_length); + last_rowid_count= 1; + } + else + { + /* current 'candidate' row confirmed by this select */ + last_rowid_count++; + } + } + + /* We get here iff we got the same row ref in all scans. */ + if (need_to_fetch_row) + error= head->file->rnd_pos(head->record[0], last_rowid); + DBUG_RETURN(error); +} + + +/* + NOTES + Enter/exit invariant: + For each quick select in the queue a {key,rowid} tuple has been + retrieved but the corresponding row hasn't been passed to output. +*/ + +int QUICK_ROR_UNION_SELECT::get_next() +{ + int error, dup_row; + QUICK_SELECT_I *quick; + byte *tmp; + DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next"); + + do + { + if (!queue.elements) + DBUG_RETURN(HA_ERR_END_OF_FILE); + /* Ok, we have a queue with > 1 scans */ + + quick= (QUICK_SELECT_I*)queue_top(&queue); + memcpy(cur_rowid, quick->last_rowid, rowid_length); + + /* put into queue rowid from the same stream as top element */ + if ((error= quick->get_next())) + { + if (error != HA_ERR_END_OF_FILE) + DBUG_RETURN(error); + queue_remove(&queue, 0); + } + else + { + quick->save_last_pos(); + queue_replaced(&queue); + } + + if (!have_prev_rowid) + { + /* No rows have been returned yet */ + dup_row= false; + have_prev_rowid= true; + } + else + dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid); + }while (dup_row); + + tmp= cur_rowid; + cur_rowid= prev_rowid; + prev_rowid= tmp; + + error= head->file->rnd_pos(quick->record, prev_rowid); + DBUG_RETURN(error); +} + /* get next possible record using quick-struct */ int QUICK_RANGE_SELECT::get_next() @@ -3883,6 +5496,156 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg, #endif +void QUICK_RANGE_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + KEY *key_info= head->key_info + index; + key_names->append(key_info->name); + length= longlong2str(max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); +} + +void QUICK_INDEX_MERGE_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + KEY *key_info= head->key_info + quick->index; + if (key_names->length()) + key_names->append(','); + key_names->append(key_info->name); + + if (used_lengths->length()) + used_lengths->append(','); + length= longlong2str(quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); + } + if (pk_quick_select) + { + KEY *key_info= head->key_info + pk_quick_select->index; + key_names->append(','); + key_names->append(key_info->name); + length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf; + used_lengths->append(','); + used_lengths->append(buf, length); + } +} + +void QUICK_ROR_INTERSECT_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + char buf[64]; + uint length; + bool first= true; + QUICK_RANGE_SELECT *quick; + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + while ((quick= it++)) + { + KEY *key_info= head->key_info + quick->index; + if (!first) + key_names->append(','); + key_names->append(key_info->name); + + if (first) + first= false; + else + used_lengths->append(','); + length= longlong2str(quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(buf, length); + } + if (cpk_quick) + { + KEY *key_info= head->key_info + cpk_quick->index; + key_names->append(','); + key_names->append(key_info->name); + length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf; + used_lengths->append(','); + used_lengths->append(buf, length); + } +} + +void QUICK_ROR_UNION_SELECT::fill_keys_and_lengths(String *key_names, + String *used_lengths) +{ + bool first= true; + QUICK_SELECT_I *quick; + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + while ((quick= it++)) + { + if (first) + first= false; + else + { + used_lengths->append(','); + key_names->append(','); + } + quick->fill_keys_and_lengths(key_names, used_lengths); + } +} + +#ifndef DBUG_OFF + +static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, + const char *msg) +{ + SEL_ARG **key,**end; + int idx; + char buff[1024]; + DBUG_ENTER("print_sel_tree"); + if (! _db_on_) + DBUG_VOID_RETURN; + + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for (idx= 0,key=tree->keys, end=key+param->keys ; + key != end ; + key++,idx++) + { + if (tree_map->is_set(idx)) + { + uint keynr= param->real_keynr[idx]; + if (tmp.length()) + tmp.append(','); + tmp.append(param->table->key_info[keynr].name); + } + } + if (!tmp.length()) + tmp.append("(empty)"); + + DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr())); + + DBUG_VOID_RETURN; +} + +static void print_ror_scans_arr(TABLE *table, const char *msg, + struct st_ror_scan_info **start, + struct st_ror_scan_info **end) +{ + DBUG_ENTER("print_ror_scans"); + if (! _db_on_) + DBUG_VOID_RETURN; + + char buff[1024]; + String tmp(buff,sizeof(buff),&my_charset_bin); + tmp.length(0); + for(;start != end; start++) + { + if (tmp.length()) + tmp.append(','); + tmp.append(table->key_info[(*start)->keynr].name); + } + if (!tmp.length()) + tmp.append("(empty)"); + DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr())); + DBUG_VOID_RETURN; +} + /***************************************************************************** ** Print a quick range for debugging ** TODO: @@ -3890,8 +5653,6 @@ bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg, ** of locking the DEBUG stream ! *****************************************************************************/ -#ifndef DBUG_OFF - static void print_key(KEY_PART *key_part,const char *key,uint used_length) { @@ -3923,65 +5684,115 @@ print_key(KEY_PART *key_part,const char *key,uint used_length) } } -static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick, - const key_map *needed_reg) +static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg) { + char buf[MAX_KEY/8+1]; DBUG_ENTER("print_param"); if (! _db_on_ || !quick) DBUG_VOID_RETURN; + DBUG_LOCK_FILE; + + quick->dbug_dump(0, true); + fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf)); - List_iterator_fast<QUICK_RANGE_SELECT> it(quick->quick_selects); - QUICK_RANGE_SELECT* quick_range_sel; - while ((quick_range_sel= it++)) - { - print_quick_sel_range(quick_range_sel, needed_reg); - } - if (quick->pk_quick_select) - print_quick_sel_range(quick->pk_quick_select, needed_reg); - + DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; } -void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map *needed_reg) +static void print_rowid(byte* val, int len) { - QUICK_RANGE *range; - char buf[MAX_KEY/8+1]; - DBUG_ENTER("print_param"); - if (! _db_on_ || !quick) - DBUG_VOID_RETURN; - + byte *pb; DBUG_LOCK_FILE; - fprintf(DBUG_FILE,"Used quick_range on key: %d (other_keys: 0x%s):\n", - quick->index, needed_reg->print(buf)); + fputc('\"', DBUG_FILE); + for (pb= val; pb!= val + len; ++pb) + fprintf(DBUG_FILE, "%c", *pb); + fprintf(DBUG_FILE, "\", hex: "); + + for (pb= val; pb!= val + len; ++pb) + fprintf(DBUG_FILE, "%x ", *pb); + fputc('\n', DBUG_FILE); + DBUG_UNLOCK_FILE; +} - QUICK_RANGE **pr= (QUICK_RANGE**)quick->ranges.buffer; - QUICK_RANGE **last_range= pr + quick->ranges.elements; - for (; pr!=last_range; ++pr) +void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose) +{ + fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n", + indent, "", head->key_info[index].name, max_used_key_length); + + if (verbose) { - range= *pr; - if (!(range->flag & NO_MIN_RANGE)) + QUICK_RANGE *range; + QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer; + QUICK_RANGE **last_range= pr + ranges.elements; + for (; pr!=last_range; ++pr) { - print_key(quick->key_parts,range->min_key,range->min_length); - if (range->flag & NEAR_MIN) - fputs(" < ",DBUG_FILE); - else - fputs(" <= ",DBUG_FILE); - } - fputs("X",DBUG_FILE); + fprintf(DBUG_FILE, "%*s", indent + 2, ""); + range= *pr; + if (!(range->flag & NO_MIN_RANGE)) + { + print_key(key_parts,range->min_key,range->min_length); + if (range->flag & NEAR_MIN) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + } + fputs("X",DBUG_FILE); - if (!(range->flag & NO_MAX_RANGE)) - { - if (range->flag & NEAR_MAX) - fputs(" < ",DBUG_FILE); - else - fputs(" <= ",DBUG_FILE); - print_key(quick->key_parts,range->max_key,range->max_length); + if (!(range->flag & NO_MAX_RANGE)) + { + if (range->flag & NEAR_MAX) + fputs(" < ",DBUG_FILE); + else + fputs(" <= ",DBUG_FILE); + print_key(key_parts,range->max_key,range->max_length); + } + fputs("\n",DBUG_FILE); } - fputs("\n",DBUG_FILE); } - DBUG_UNLOCK_FILE; - DBUG_VOID_RETURN; +} + +void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + QUICK_RANGE_SELECT *quick; + fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + if (pk_quick_select) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + pk_quick_select->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects); + QUICK_RANGE_SELECT *quick; + fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n", + indent, "", need_to_fetch_row? "":"non-"); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + if (cpk_quick) + { + fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, ""); + cpk_quick->dbug_dump(indent+2, verbose); + } + fprintf(DBUG_FILE, "%*s}\n", indent, ""); +} + +void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose) +{ + List_iterator_fast<QUICK_SELECT_I> it(quick_selects); + QUICK_SELECT_I *quick; + fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, ""); + fprintf(DBUG_FILE, "%*smerged scans {\n", indent, ""); + while ((quick= it++)) + quick->dbug_dump(indent+2, verbose); + fprintf(DBUG_FILE, "%*s}\n", indent, ""); } #endif diff --git a/sql/opt_range.h b/sql/opt_range.h index 7c2981795a2..cd9664ba759 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -79,14 +79,18 @@ public: TABLE *head; /* - the only index this quick select uses, or MAX_KEY for - QUICK_INDEX_MERGE_SELECT + index this quick select uses, or MAX_KEY for quick selects + that use several indexes */ - uint index; + uint index; + + /* applicable iff index!= MAX_KEY */ uint max_used_key_length, used_key_parts; QUICK_SELECT_I(); virtual ~QUICK_SELECT_I(){}; + + /**/ virtual int init() = 0; virtual int reset(void) = 0; virtual int get_next() = 0; /* get next record to retrieve */ @@ -97,27 +101,64 @@ public: QS_TYPE_RANGE = 0, QS_TYPE_INDEX_MERGE = 1, QS_TYPE_RANGE_DESC = 2, - QS_TYPE_FULLTEXT = 3 + QS_TYPE_FULLTEXT = 3, + QS_TYPE_ROR_INTERSECT = 4, + QS_TYPE_ROR_UNION = 5, }; - /* Get type of this quick select - one of the QS_* values */ - virtual int get_type() = 0; + /* Get type of this quick select - one of the QS_TYPE_* values */ + virtual int get_type() = 0; + + /* + Initialize this quick select as a child of a index union or intersection + scan. This call replaces init() call. + */ + virtual int init_ror_child_scan(bool reuse_handler) + { DBUG_ASSERT(0); return 1; } + + virtual void cleanup_ror_child_scan() { DBUG_ASSERT(0); } + virtual void save_last_pos(){}; + + /* + Fill key_names with list of keys this quick select used; + fill used_lenghth with correponding used lengths. + This is used by select_describe. + */ + virtual void fill_keys_and_lengths(String *key_names, + String *used_lengths)=0; + + virtual bool check_if_keys_used(List<Item> *fields); + + /* + rowid of last row retrieved by this quick select. This is used only + when doing ROR-index_merge selects + */ + byte *last_rowid; + byte *record; +#ifndef DBUG_OFF + /* + Print quick select information to DBUG_FILE. Caller is responsible + for locking DBUG_FILE before this call and unlocking it afterwards. + */ + virtual void dbug_dump(int indent, bool verbose)= 0; +#endif }; + struct st_qsel_param; class SEL_ARG; -class QUICK_RANGE_SELECT : public QUICK_SELECT_I +class QUICK_RANGE_SELECT : public QUICK_SELECT_I { protected: bool next,dont_free; public: int error; +protected: handler *file; - byte *record; + bool free_file; /* if true, this quick select "owns" file and will free it */ + protected: - friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick, - const key_map* needed_reg); friend QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table, struct st_table_ref *ref); @@ -131,17 +172,19 @@ protected: MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; friend class QUICK_INDEX_MERGE_SELECT; + friend class QUICK_ROR_INTERSECT_SELECT; DYNAMIC_ARRAY ranges; /* ordered array of range ptrs */ QUICK_RANGE **cur_range; /* current element in ranges */ QUICK_RANGE *range; - MEM_ROOT alloc; KEY_PART *key_parts; int cmp_next(QUICK_RANGE *range); int cmp_prev(QUICK_RANGE *range); bool row_in_ranges(); public: + MEM_ROOT alloc; + QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0, MEM_ROOT *parent_alloc=NULL); ~QUICK_RANGE_SELECT(); @@ -157,7 +200,16 @@ public: int get_next(); bool reverse_sorted() { return 0; } bool unique_key_range(); + int init_ror_child_scan(bool reuse_handler); + void save_last_pos() + { + file->position(record); + }; int get_type() { return QS_TYPE_RANGE; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif }; @@ -232,6 +284,11 @@ public: bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_INDEX_MERGE; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); + bool check_if_keys_used(List<Item> *fields); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); @@ -242,9 +299,6 @@ public: List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it; QUICK_RANGE_SELECT* cur_quick_select; - /* last element in quick_selects list */ - QUICK_RANGE_SELECT* last_quick_select; - /* quick select that uses clustered primary key (NULL if none) */ QUICK_RANGE_SELECT* pk_quick_select; @@ -262,6 +316,87 @@ public: READ_RECORD read_record; }; + +/* + Rowid-Ordered Retrieval (ROR) index intersection quick select. + This quick select produces an intersection of records returned by several + QUICK_RANGE_SELECTs that return data ordered by rowid. +*/ +class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_INTERSECT_SELECT(THD *thd, TABLE *table, + bool retrieve_full_rows, + MEM_ROOT *parent_alloc); + ~QUICK_ROR_INTERSECT_SELECT(); + + int init(); + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_INTERSECT; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); + bool check_if_keys_used(List<Item> *fields); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif + int init_ror_child_scan(bool reuse_handler); + bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); + + /* range quick selects this intersection consists of */ + List<QUICK_RANGE_SELECT> quick_selects; + + QUICK_RANGE_SELECT *cpk_quick; + MEM_ROOT alloc; + THD *thd; + bool reset_called; + bool need_to_fetch_row; +}; + +/* + Rowid-Ordered Retrieval index union select. + +*/ +class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I +{ +public: + QUICK_ROR_UNION_SELECT(THD *thd, TABLE *table); + ~QUICK_ROR_UNION_SELECT(); + + int init(); + int reset(void); + int get_next(); + bool reverse_sorted() { return false; } + bool unique_key_range() { return false; } + int get_type() { return QS_TYPE_ROR_UNION; } + void fill_keys_and_lengths(String *key_names, String *used_lengths); + bool check_if_keys_used(List<Item> *fields); +#ifndef DBUG_OFF + virtual void dbug_dump(int indent, bool verbose); +#endif + + bool push_quick_back(QUICK_SELECT_I *quick_sel_range); + + /* range quick selects this index_merge read consists of */ + List<QUICK_SELECT_I> quick_selects; + + QUEUE queue; + MEM_ROOT alloc; + + THD *thd; + byte *cur_rowid; + byte *prev_rowid; + uint rowid_length; + bool reset_called; + bool have_prev_rowid; + +private: + static int queue_cmp(void *arg, byte *val1, byte *val2); +}; + + + class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT { public: diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 22cb11eed41..72a39e347b1 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -151,6 +151,12 @@ int mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, SQL_LIST *order, select= 0; } + if (select && select->quick && select->quick->reset()) + { + delete select; + free_underlaid_joins(thd, &thd->lex->select_lex); + DBUG_RETURN(-1); // This will force out message + } init_read_record(&info,thd,table,select,1,1); deleted=0L; init_ftfuncs(thd, &thd->lex->select_lex, 1); @@ -250,10 +256,10 @@ cleanup: #define MEM_STRIP_BUF_SIZE current_thd->variables.sortbuff_size -extern "C" int refposcmp2(void* arg, const void *a,const void *b) +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b) { - /* arg is a pointer to file->ref_length */ - return memcmp(a,b, *(int*) arg); + handler *file= (handler*)arg; + return file->cmp_ref((const byte*)a, (const byte*)b); } multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt, @@ -317,8 +323,8 @@ multi_delete::initialize_tables(JOIN *join) for (walk=walk->next ; walk ; walk=walk->next) { TABLE *table=walk->table; - *tempfiles_ptr++= new Unique (refposcmp2, - (void *) &table->file->ref_length, + *tempfiles_ptr++= new Unique (refpos_order_cmp, + (void *) table->file, table->file->ref_length, MEM_STRIP_BUF_SIZE); } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9f26d7458d0..528fe38ed3f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -6976,8 +6976,16 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, } else if (select && select->quick) // Range found by opt_range { - /* assume results are not ordered when index merge is used */ - if (select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + int quick_type= select->quick->get_type(); + /* + assume results are not ordered when index merge is used + TODO: sergeyp: Results of all index merge selects actually are ordered + by clustered PK values. + */ + + if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) DBUG_RETURN(0); ref_key= select->quick->index; ref_key_parts= select->quick->used_key_parts; @@ -7038,9 +7046,11 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, */ if (!select->quick->reverse_sorted()) { + int quick_type= select->quick->get_type(); if (table->file->index_flags(ref_key) & HA_NOT_READ_PREFIX_LAST || - (select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)) + quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || + quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION) DBUG_RETURN(0); // Use filesort // ORDER BY range_key DESC @@ -8961,6 +8971,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, select_result *result=join->result; Item *item_null= new Item_null(); CHARSET_INFO *cs= &my_charset_latin1; + int quick_type= -1; DBUG_ENTER("select_describe"); DBUG_PRINT("info", ("Select 0x%lx, type %s, message %s", (ulong)join->select_lex, join->select_lex->type, @@ -9006,8 +9017,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, cs)); if (tab->type == JT_ALL && tab->select && tab->select->quick) { - if (tab->select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) + quick_type= tab->select->quick->get_type(); + if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || + (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) tab->type = JT_INDEX_MERGE; else tab->type = JT_RANGE; @@ -9028,6 +9041,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, strlen(join_type_str[tab->type]), cs)); uint j; + /* Build "possible_keys" value and add it to item_list */ if (!tab->keys.is_clear_all()) { for (j=0 ; j < table->keys ; j++) @@ -9044,6 +9058,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, item_list.push_back(new Item_string(tmp1.ptr(),tmp1.length(),cs)); else item_list.push_back(item_null); + + /* Build key,key_len, and ref values and add them to item_list */ if (tab->ref.key_parts) { KEY *key_info=table->key_info+ tab->ref.key; @@ -9078,48 +9094,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } else if (tab->select && tab->select->quick) { - if (tab->select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - QUICK_INDEX_MERGE_SELECT *quick_imerge= - (QUICK_INDEX_MERGE_SELECT*)tab->select->quick; - QUICK_RANGE_SELECT *quick; - - List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge-> - quick_selects); - while ((quick= it++)) - { - KEY *key_info= table->key_info + quick->index; - register uint length; - if (tmp3.length()) - tmp3.append(','); - - tmp3.append(key_info->name); - - if (tmp2.length()) - tmp2.append(','); - - length= longlong2str(quick->max_used_key_length, keylen_str_buf, - 10) - - keylen_str_buf; - - tmp2.append(keylen_str_buf, length); - } - } - else - { - KEY *key_info= table->key_info + tab->select->quick->index; - register uint length; - tmp3.append(key_info->name); - - length= longlong2str(tab->select->quick->max_used_key_length, - keylen_str_buf, 10) - - keylen_str_buf; - tmp2.append(keylen_str_buf, length); - } - - item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs)); + tab->select->quick->fill_keys_and_lengths(&tmp2, &tmp3); item_list.push_back(new Item_string(tmp2.ptr(),tmp2.length(),cs)); + item_list.push_back(new Item_string(tmp3.ptr(),tmp3.length(),cs)); item_list.push_back(item_null); } else @@ -9134,7 +9111,10 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, my_bool key_read=table->key_read; if (tab->type == JT_NEXT && table->used_keys.is_set(tab->index)) key_read=1; - + if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT && + !((QUICK_ROR_INTERSECT_SELECT*)tab->select->quick)->need_to_fetch_row) + key_read=1; + if (tab->info) item_list.push_back(new Item_string(tab->info,strlen(tab->info),cs)); else diff --git a/sql/sql_select.h b/sql/sql_select.h index 9bba4e9318e..9d784e585b1 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -329,7 +329,7 @@ uint find_shortest_key(TABLE *table, const key_map *usable_keys); int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds); /* from sql_delete.cc, used by opt_range.cc */ -extern "C" int refposcmp2(void* arg, const void *a,const void *b); +extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b); /* class to copying an field/item to a key struct */ diff --git a/sql/sql_test.cc b/sql/sql_test.cc index cd493fcac30..9e8b6f551c6 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -182,37 +182,8 @@ TEST_join(JOIN *join) tab->select->quick_keys.print(buf)); else if (tab->select->quick) { - int quick_type= tab->select->quick->get_type(); - if ((quick_type == QUICK_SELECT_I::QS_TYPE_RANGE) || - (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE_DESC)) - { - fprintf(DBUG_FILE, - " quick select used on key %s, length: %d\n", - form->key_info[tab->select->quick->index].name, - tab->select->quick->max_used_key_length); - } - else if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - QUICK_INDEX_MERGE_SELECT *quick_imerge= - (QUICK_INDEX_MERGE_SELECT*)tab->select->quick; - QUICK_RANGE_SELECT *quick; - fprintf(DBUG_FILE, - " index_merge quick select used\n"); - - List_iterator_fast<QUICK_RANGE_SELECT> it(quick_imerge->quick_selects); - while ((quick = it++)) - { - fprintf(DBUG_FILE, - " range quick select: key %s, length: %d\n", - form->key_info[quick->index].name, - quick->max_used_key_length); - } - } - else - { - fprintf(DBUG_FILE, - " quick select of unknown nature used\n"); - } + fprintf(DBUG_FILE, " quick select used:\n"); + tab->select->quick->dbug_dump(18, false); } else VOID(fputs(" select used\n",DBUG_FILE)); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 6b4d2f9b659..15d86b63aa1 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -176,18 +176,11 @@ int mysql_update(THD *thd, } init_ftfuncs(thd, &thd->lex->select_lex, 1); /* Check if we are modifying a key that we are used to search with */ + if (select && select->quick) { - if (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - used_index= select->quick->index; - used_key_is_modified= (!select->quick->unique_key_range() && - check_if_key_used(table,used_index,fields)); - } - else - { - used_key_is_modified= true; - } + used_key_is_modified= (!select->quick->unique_key_range() && + select->quick->check_if_keys_used(&fields)); } else if ((used_index=table->file->key_used_on_scan) < MAX_KEY) used_key_is_modified=check_if_key_used(table, used_index, fields); @@ -246,7 +239,9 @@ int mysql_update(THD *thd, if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE, MYF(MY_WME))) goto err; - + + if (select && select->quick && select->quick->reset()) + goto err; init_read_record(&info,thd,table,select,0,1); thd->proc_info="Searching rows for update"; uint tmp_limit= limit; @@ -300,6 +295,9 @@ int mysql_update(THD *thd, if (handle_duplicates == DUP_IGNORE) table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + + if (select && select->quick && select->quick->reset()) + goto err; init_read_record(&info,thd,table,select,0,1); updated= found= 0; @@ -732,26 +730,7 @@ static bool safe_update_on_fly(JOIN_TAB *join_tab, List<Item> *fields) case JT_ALL: /* If range search on index */ if (join_tab->quick) - { - if (join_tab->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) - { - return !check_if_key_used(table,join_tab->quick->index,*fields); - } - else - { - QUICK_INDEX_MERGE_SELECT *qsel_imerge= - (QUICK_INDEX_MERGE_SELECT*)(join_tab->quick); - List_iterator_fast<QUICK_RANGE_SELECT> it(qsel_imerge->quick_selects); - QUICK_RANGE_SELECT *quick; - while ((quick= it++)) - { - if (check_if_key_used(table, quick->index, *fields)) - return 0; - } - return 1; - } - } - + return !join_tab->quick->check_if_keys_used(fields); /* If scanning in clustered key */ if ((table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && table->primary_key < MAX_KEY) |