diff options
author | unknown <sergefp@mysql.com> | 2004-11-16 21:36:31 +0300 |
---|---|---|
committer | unknown <sergefp@mysql.com> | 2004-11-16 21:36:31 +0300 |
commit | f2db7d57f5a3735d36a6c53c27f5ad5dc77c5603 (patch) | |
tree | e0894041b167c4643344aaa4a8bf1281db3ac3e4 | |
parent | c9a09e12db31c530ffa763e9657dd502db26c5d7 (diff) | |
parent | 612c83b845822e13a6653af3985c4e3395cb8259 (diff) | |
download | mariadb-git-f2db7d57f5a3735d36a6c53c27f5ad5dc77c5603.tar.gz |
Merge spetrunia@bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/dbdata/psergey/mysql-4.1-heap-bug
heap/hp_hash.c:
Auto merged
mysql-test/r/heap.result:
Auto merged
sql/ha_heap.cc:
Auto merged
-rw-r--r-- | heap/_check.c | 23 | ||||
-rw-r--r-- | heap/hp_block.c | 51 | ||||
-rw-r--r-- | heap/hp_clear.c | 1 | ||||
-rw-r--r-- | heap/hp_create.c | 1 | ||||
-rw-r--r-- | heap/hp_delete.c | 12 | ||||
-rw-r--r-- | heap/hp_hash.c | 20 | ||||
-rw-r--r-- | heap/hp_write.c | 98 | ||||
-rw-r--r-- | include/heap.h | 51 | ||||
-rw-r--r-- | mysql-test/r/heap.result | 8 | ||||
-rw-r--r-- | mysql-test/r/heap_hash.result | 160 | ||||
-rw-r--r-- | mysql-test/r/myisam.result | 2 | ||||
-rw-r--r-- | mysql-test/t/heap_hash.test | 112 | ||||
-rw-r--r-- | sql/ha_heap.cc | 55 | ||||
-rw-r--r-- | sql/ha_heap.h | 10 | ||||
-rw-r--r-- | sql/structs.h | 7 |
15 files changed, 558 insertions, 53 deletions
diff --git a/heap/_check.c b/heap/_check.c index 233cb8cb0c5..a745aee48bf 100644 --- a/heap/_check.c +++ b/heap/_check.c @@ -102,9 +102,11 @@ static int check_one_key(HP_KEYDEF *keydef, uint keynr, ulong records, int error; uint i,found,max_links,seek,links; uint rec_link; /* Only used with debugging */ + uint hash_buckets_found; HASH_INFO *hash_info; error=0; + hash_buckets_found= 0; for (i=found=max_links=seek=0 ; i < records ; i++) { hash_info=hp_find_hash(&keydef->block,i); @@ -128,21 +130,32 @@ static int check_one_key(HP_KEYDEF *keydef, uint keynr, ulong records, found++; } if (links > max_links) max_links=links; + hash_buckets_found++; } } if (found != records) { - DBUG_PRINT("error",("Found %ld of %ld records")); + DBUG_PRINT("error",("Found %ld of %ld records", found, records)); + error=1; + } + if (keydef->hash_buckets != hash_buckets_found) + { + DBUG_PRINT("error",("Found %ld buckets, stats shows %ld buckets", + hash_buckets_found, keydef->hash_buckets)); error=1; } DBUG_PRINT("info", - ("records: %ld seeks: %d max links: %d hitrate: %.2f", + ("records: %ld seeks: %d max links: %d hitrate: %.2f " + "buckets: %d", records,seek,max_links, - (float) seek / (float) (records ? records : 1))); + (float) seek / (float) (records ? records : 1), + hash_buckets_found)); if (print_status) - printf("Key: %d records: %ld seeks: %d max links: %d hitrate: %.2f\n", + printf("Key: %d records: %ld seeks: %d max links: %d " + "hitrate: %.2f buckets: %d\n", keynr, records, seek, max_links, - (float) seek / (float) (records ? records : 1)); + (float) seek / (float) (records ? records : 1), + hash_buckets_found); return error; } diff --git a/heap/hp_block.c b/heap/hp_block.c index 6a022fb3084..f26b208b521 100644 --- a/heap/hp_block.c +++ b/heap/hp_block.c @@ -18,12 +18,19 @@ #include "heapdef.h" - /* Find record according to record-position */ +/* + Find record according to record-position. + + The record is located by factoring position number pos into (p_0, p_1, ...) + such that + pos = SUM_i(block->level_info[i].records_under_level * p_i) + {p_0, p_1, ...} serve as indexes to descend the blocks tree. +*/ byte *hp_find_block(HP_BLOCK *block, ulong pos) { reg1 int i; - reg3 HP_PTRS *ptr; + reg3 HP_PTRS *ptr; /* block base ptr */ for (i=block->levels-1, ptr=block->root ; i > 0 ; i--) { @@ -34,8 +41,18 @@ byte *hp_find_block(HP_BLOCK *block, ulong pos) } - /* get one new block-of-records. Alloc ptr to block if neaded */ - /* Interrupts are stopped to allow ha_panic in interrupts */ +/* + Get one new block-of-records. Alloc ptr to block if needed + SYNOPSIS + hp_get_new_block() + block HP_BLOCK tree-like block + alloc_length OUT Amount of memory allocated from the heap + + Interrupts are stopped to allow ha_panic in interrupts + RETURN + 0 OK + 1 Out of memory +*/ int hp_get_new_block(HP_BLOCK *block, ulong *alloc_length) { @@ -46,6 +63,18 @@ int hp_get_new_block(HP_BLOCK *block, ulong *alloc_length) if (block->level_info[i].free_ptrs_in_block) break; + /* + Allocate space for leaf block plus space for upper level blocks up to + first level that has a free slot to put the pointer. + In some cases we actually allocate more then we need: + Consider e.g. a situation where we have one level 1 block and one level 0 + block, the level 0 block is full and this function is called. We only + need a leaf block in this case. Nevertheless, we will get here with i=1 + and will also allocate sizeof(HP_PTRS) for non-leaf block and will never + use this space. + This doesn't add much overhead - with current values of sizeof(HP_PTRS) + and my_default_record_cache_size we get about 1/128 unused memory. + */ *alloc_length=sizeof(HP_PTRS)*i+block->records_in_block* block->recbuffer; if (!(root=(HP_PTRS*) my_malloc(*alloc_length,MYF(0)))) return 1; @@ -60,21 +89,33 @@ int hp_get_new_block(HP_BLOCK *block, ulong *alloc_length) dont_break(); /* Dont allow SIGHUP or SIGINT */ if ((uint) i == block->levels) { + /* Adding a new level on top of the existing ones. */ block->levels=i+1; + /* + Use first allocated HP_PTRS as a top-level block. Put the current + block tree into the first slot of a new top-level block. + */ block->level_info[i].free_ptrs_in_block=HP_PTRS_IN_NOD-1; ((HP_PTRS**) root)[0]= block->root; block->root=block->level_info[i].last_blocks= root++; } + /* Occupy the free slot we've found at level i */ block->level_info[i].last_blocks-> blocks[HP_PTRS_IN_NOD - block->level_info[i].free_ptrs_in_block--]= (byte*) root; - + + /* Add a block subtree with each node having one left-most child */ for (j=i-1 ; j >0 ; j--) { block->level_info[j].last_blocks= root++; block->level_info[j].last_blocks->blocks[0]=(byte*) root; block->level_info[j].free_ptrs_in_block=HP_PTRS_IN_NOD-1; } + + /* + root now points to last (block->records_in_block* block->recbuffer) + allocated bytes. Use it as a leaf block. + */ block->level_info[0].last_blocks= root; allow_break(); /* Allow SIGHUP & SIGINT */ } diff --git a/heap/hp_clear.c b/heap/hp_clear.c index 4440344f990..596d71ebe9c 100644 --- a/heap/hp_clear.c +++ b/heap/hp_clear.c @@ -97,6 +97,7 @@ void hp_clear_keys(HP_SHARE *info) VOID(hp_free_level(block,block->levels,block->root,(byte*) 0)); block->levels=0; block->last_allocated=0; + keyinfo->hash_buckets= 0; } } info->index_length=0; diff --git a/heap/hp_create.c b/heap/hp_create.c index 02725576c8f..d1783118c0d 100644 --- a/heap/hp_create.c +++ b/heap/hp_create.c @@ -128,6 +128,7 @@ int heap_create(const char *name, uint keys, HP_KEYDEF *keydef, max_records); keyinfo->delete_key= hp_delete_key; keyinfo->write_key= hp_write_key; + keyinfo->hash_buckets= 0; } } share->min_records= min_records; diff --git a/heap/hp_delete.c b/heap/hp_delete.c index c918cf37f05..9cf8b8936b6 100644 --- a/heap/hp_delete.c +++ b/heap/hp_delete.c @@ -97,8 +97,8 @@ int hp_rb_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, flag Is set if we want's to correct info->current_ptr RETURN - 0 ok - # error number + 0 Ok + other Error code */ int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, @@ -151,6 +151,8 @@ int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, pos->ptr_to_rec=empty->ptr_to_rec; pos->next_key=empty->next_key; } + else + keyinfo->hash_buckets--; if (empty == lastpos) /* deleted last hash key */ DBUG_RETURN (0); @@ -187,7 +189,11 @@ int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, } pos3= pos; /* Link pos->next after lastpos */ } - else pos3= 0; /* Different positions merge */ + else + { + pos3= 0; /* Different positions merge */ + keyinfo->hash_buckets--; + } empty[0]=lastpos[0]; hp_movelink(pos3, empty, pos->next_key); diff --git a/heap/hp_hash.c b/heap/hp_hash.c index b54e76d79fe..ee5b4958e62 100644 --- a/heap/hp_hash.c +++ b/heap/hp_hash.c @@ -196,7 +196,18 @@ byte *hp_search_next(HP_INFO *info, HP_KEYDEF *keyinfo, const byte *key, } - /* Calculate pos according to keys */ +/* + Calculate position number for hash value. + SYNOPSIS + hp_mask() + hashnr Hash value + buffmax Value such that + 2^(n-1) < maxlength <= 2^n = buffmax + maxlength + + RETURN + Array index, in [0..maxlength) +*/ ulong hp_mask(ulong hashnr, ulong buffmax, ulong maxlength) { @@ -205,7 +216,12 @@ ulong hp_mask(ulong hashnr, ulong buffmax, ulong maxlength) } - /* Change link from pos to new_link */ +/* + Change + next_link -> ... -> X -> pos + to + next_link -> ... -> X -> newlink +*/ void hp_movelink(HASH_INFO *pos, HASH_INFO *next_link, HASH_INFO *newlink) { diff --git a/heap/hp_write.c b/heap/hp_write.c index 3b0ec76d616..43cee67b39c 100644 --- a/heap/hp_write.c +++ b/heap/hp_write.c @@ -36,7 +36,6 @@ int heap_write(HP_INFO *info, const byte *record) byte *pos; HP_SHARE *share=info->s; DBUG_ENTER("heap_write"); - #ifndef DBUG_OFF if (info->mode & O_RDONLY) { @@ -160,7 +159,31 @@ static byte *next_free_record_pos(HP_SHARE *info) block_pos*info->block.recbuffer); } - /* Write a hash-key to the hash-index */ + +/* + Write a hash-key to the hash-index + SYNOPSIS + info Heap table info + keyinfo Key info + record Table record to added + recpos Memory buffer where the table record will be stored if added + successfully + NOTE + Hash index uses HP_BLOCK structure as a 'growable array' of HASH_INFO + structs. Array size == number of entries in hash index. + hp_mask(hp_rec_hashnr()) maps hash entries values to hash array positions. + If there are several hash entries with the same hash array position P, + they are connected in a linked list via HASH_INFO::next_key. The first + list element is located at position P, next elements are located at + positions for which there is no record that should be located at that + position. The order of elements in the list is arbitrary. + + RETURN + 0 - OK + -1 - Out of memory + HA_ERR_FOUND_DUPP_KEY - Duplicate record on unique key. The record was + still added and the caller must call hp_delete_key for it. +*/ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, const byte *record, byte *recpos) @@ -180,19 +203,54 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, DBUG_RETURN(-1); /* No more memory */ halfbuff= (long) share->blength >> 1; pos= hp_find_hash(&keyinfo->block,(first_index=share->records-halfbuff)); - + + /* + We're about to add one more hash array position, with hash_mask=#records. + The number of hash positions will change and some entries might need to + be relocated to the newly added position. Those entries are currently + members of the list that starts at #first_index position (this is + guaranteed by properties of hp_mask(hp_rec_hashnr(X)) mapping function) + At #first_index position currently there may be either: + a) An entry with hashnr != first_index. We don't need to move it. + or + b) A list of items with hash_mask=first_index. The list contains entries + of 2 types: + 1) entries that should be relocated to the list that starts at new + position we're adding ('uppper' list) + 2) entries that should be left in the list starting at #first_index + position ('lower' list) + */ if (pos != empty) /* If some records */ { do { hashnr = hp_rec_hashnr(keyinfo, pos->ptr_to_rec); - if (flag == 0) /* First loop; Check if ok */ + if (flag == 0) + { + /* + First loop, bail out if we're dealing with case a) from above + comment + */ if (hp_mask(hashnr, share->blength, share->records) != first_index) break; + } + /* + flag & LOWFIND - found a record that should be put into lower position + flag & LOWUSED - lower position occupied by the record + Same for HIGHFIND and HIGHUSED and 'upper' position + + gpos - ptr to last element in lower position's list + gpos2 - ptr to last element in upper position's list + + ptr_to_rec - ptr to last entry that should go into lower list. + ptr_to_rec2 - same for upper list. + */ if (!(hashnr & halfbuff)) - { /* Key will not move */ + { + /* Key should be put into 'lower' list */ if (!(flag & LOWFIND)) { + /* key is the first element to go into lower position */ if (flag & HIGHFIND) { flag=LOWFIND | HIGHFIND; @@ -203,16 +261,21 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, } else { - flag=LOWFIND | LOWUSED; /* key isn't changed */ + /* + We can only get here at first iteration: key is at 'lower' + position pos and should be left here. + */ + flag=LOWFIND | LOWUSED; gpos=pos; ptr_to_rec=pos->ptr_to_rec; } } else - { + { + /* Already have another key for lower position */ if (!(flag & LOWUSED)) { - /* Change link of previous LOW-key */ + /* Change link of previous lower-list key */ gpos->ptr_to_rec=ptr_to_rec; gpos->next_key=pos; flag= (flag & HIGHFIND) | (LOWFIND | LOWUSED); @@ -222,19 +285,21 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, } } else - { /* key will be moved */ + { + /* key will be put into 'higher' list */ if (!(flag & HIGHFIND)) { flag= (flag & LOWFIND) | HIGHFIND; /* key shall be moved to the last (empty) position */ - gpos2 = empty; empty=pos; + gpos2= empty; + empty= pos; ptr_to_rec2=pos->ptr_to_rec; } else { if (!(flag & HIGHUSED)) { - /* Change link of previous hash-key and save */ + /* Change link of previous upper-list key and save */ gpos2->ptr_to_rec=ptr_to_rec2; gpos2->next_key=pos; flag= (flag & LOWFIND) | (HIGHFIND | HIGHUSED); @@ -245,6 +310,15 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, } } while ((pos=pos->next_key)); + + if ((flag & (LOWFIND | HIGHFIND)) == (LOWFIND | HIGHFIND)) + { + /* + If both 'higher' and 'lower' list have at least one element, now + there are two hash buckets instead of one. + */ + keyinfo->hash_buckets++; + } if ((flag & (LOWFIND | LOWUSED)) == LOWFIND) { @@ -265,6 +339,7 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, { pos->ptr_to_rec=recpos; pos->next_key=0; + keyinfo->hash_buckets++; } else { @@ -280,6 +355,7 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, } else { + keyinfo->hash_buckets++; pos->ptr_to_rec=recpos; pos->next_key=0; hp_movelink(pos, gpos, empty); diff --git a/include/heap.h b/include/heap.h index 63f2abbabc7..5e83a6e2cb5 100644 --- a/include/heap.h +++ b/include/heap.h @@ -63,18 +63,48 @@ typedef struct st_heap_ptrs struct st_level_info { - uint free_ptrs_in_block,records_under_level; - HP_PTRS *last_blocks; /* pointers to HP_PTRS or records */ + /* Number of unused slots in *last_blocks HP_PTRS block (0 for 0th level) */ + uint free_ptrs_in_block; + + /* + Maximum number of records that can be 'contained' inside of each element + of last_blocks array. For level 0 - 1, for level 1 - HP_PTRS_IN_NOD, for + level 2 - HP_PTRS_IN_NOD^2 and so forth. + */ + uint records_under_level; + + /* + Ptr to last allocated HP_PTRS (or records buffer for level 0) on this + level. + */ + HP_PTRS *last_blocks; }; -typedef struct st_heap_block /* The data is saved in blocks */ + +/* + Heap table records and hash index entries are stored in HP_BLOCKs. + HP_BLOCK is used as a 'growable array' of fixed-size records. Size of record + is recbuffer bytes. + The internal representation is as follows: + HP_BLOCK is a hierarchical structure of 'blocks'. + A block at level 0 is an array records_in_block records. + A block at higher level is an HP_PTRS structure with pointers to blocks at + lower levels. + At the highest level there is one top block. It is stored in HP_BLOCK::root. + + See hp_find_block for a description of how record pointer is obtained from + its index. + See hp_get_new_block +*/ + +typedef struct st_heap_block { - HP_PTRS *root; + HP_PTRS *root; /* Top-level block */ struct st_level_info level_info[HP_MAX_LEVELS+1]; - uint levels; - uint records_in_block; /* Records in a heap-block */ + uint levels; /* number of used levels */ + uint records_in_block; /* Records in one heap-block */ uint recbuffer; /* Length of one saved record */ - ulong last_allocated; /* Blocks allocated, used by keys */ + ulong last_allocated; /* number of records there is allocated space for */ } HP_BLOCK; struct st_heap_info; /* For referense */ @@ -87,6 +117,11 @@ typedef struct st_hp_keydef /* Key definition with open */ uint8 algorithm; /* HASH / BTREE */ HA_KEYSEG *seg; HP_BLOCK block; /* Where keys are saved */ + /* + Number of buckets used in hash table. Used only to provide + #records estimates for heap key scans. + */ + ha_rows hash_buckets; TREE rb_tree; int (*write_key)(struct st_heap_info *info, struct st_hp_keydef *keyinfo, const byte *record, byte *recpos); @@ -102,7 +137,7 @@ typedef struct st_heap_share ulong min_records,max_records; /* Params to open */ ulong data_length,index_length; uint records; /* records */ - uint blength; + uint blength; /* records rounded up to 2^n */ uint deleted; /* Deleted records in database */ uint reclength; /* Length of one record */ uint changed; diff --git a/mysql-test/r/heap.result b/mysql-test/r/heap.result index 92b694b5117..4950799137a 100644 --- a/mysql-test/r/heap.result +++ b/mysql-test/r/heap.result @@ -4,7 +4,7 @@ insert into t1 values(1,1),(2,2),(3,3),(4,4); delete from t1 where a=1 or a=0; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 0 PRIMARY 1 a NULL NULL NULL NULL HASH +t1 0 PRIMARY 1 a NULL 3 NULL NULL HASH select * from t1; a b 2 2 @@ -169,7 +169,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL btn NULL NULL NULL 11 Using where explain select * from t1 where btn="a" and new_col="a"; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref btn btn 11 const,const 10 Using where +1 SIMPLE t1 ref btn btn 11 const,const 2 Using where drop table t1; CREATE TABLE t1 ( a int default NULL, @@ -182,7 +182,7 @@ SELECT * FROM t1 WHERE a=NULL; a b explain SELECT * FROM t1 WHERE a IS NULL; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a a 5 const 10 Using where +1 SIMPLE t1 ref a a 5 const 1 Using where SELECT * FROM t1 WHERE a<=>NULL; a b NULL 99 @@ -204,7 +204,7 @@ key a (a) INSERT INTO t1 VALUES (10), (10), (10); EXPLAIN SELECT * FROM t1 WHERE a=10; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a a 5 const 10 Using where +1 SIMPLE t1 ref a a 5 const 3 Using where SELECT * FROM t1 WHERE a=10; a 10 diff --git a/mysql-test/r/heap_hash.result b/mysql-test/r/heap_hash.result index 7affbf788fb..4f5de197858 100644 --- a/mysql-test/r/heap_hash.result +++ b/mysql-test/r/heap_hash.result @@ -1,10 +1,10 @@ -drop table if exists t1; +drop table if exists t1,t2; create table t1 (a int not null,b int not null, primary key using HASH (a)) engine=heap comment="testing heaps" avg_row_length=100 min_rows=1 max_rows=100; insert into t1 values(1,1),(2,2),(3,3),(4,4); delete from t1 where a=1 or a=0; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 0 PRIMARY 1 a NULL NULL NULL NULL HASH +t1 0 PRIMARY 1 a NULL 3 NULL NULL HASH select * from t1; a b 2 2 @@ -169,7 +169,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL btn NULL NULL NULL 11 Using where explain select * from t1 where btn="a" and new_col="a"; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref btn btn 11 const,const 10 Using where +1 SIMPLE t1 ref btn btn 11 const,const 2 Using where drop table t1; CREATE TABLE t1 ( a int default NULL, @@ -182,7 +182,7 @@ SELECT * FROM t1 WHERE a=NULL; a b explain SELECT * FROM t1 WHERE a IS NULL; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref a a 5 const 10 Using where +1 SIMPLE t1 ref a a 5 const 1 Using where SELECT * FROM t1 WHERE a<=>NULL; a b NULL 99 @@ -203,3 +203,155 @@ DELETE from t1 where a < 100; SELECT * from t1; a DROP TABLE t1; +create table t1 +( +a char(8) not null, +b char(20) not null, +c int not null, +key (a) +) engine=heap; +insert into t1 values ('aaaa', 'prefill-hash=5',0); +insert into t1 values ('aaab', 'prefill-hash=0',0); +insert into t1 values ('aaac', 'prefill-hash=7',0); +insert into t1 values ('aaad', 'prefill-hash=2',0); +insert into t1 values ('aaae', 'prefill-hash=1',0); +insert into t1 values ('aaaf', 'prefill-hash=4',0); +insert into t1 values ('aaag', 'prefill-hash=3',0); +insert into t1 values ('aaah', 'prefill-hash=6',0); +explain select * from t1 where a='aaaa'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaab'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaac'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaad'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +insert into t1 select * from t1; +explain select * from t1 where a='aaaa'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaab'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaac'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaad'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +flush tables; +explain select * from t1 where a='aaaa'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 2 Using where +explain select * from t1 where a='aaab'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 2 Using where +explain select * from t1 where a='aaac'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 2 Using where +explain select * from t1 where a='aaad'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 2 Using where +create table t2 as select * from t1; +delete from t1; +insert into t1 select * from t2; +explain select * from t1 where a='aaaa'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaab'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaac'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +explain select * from t1 where a='aaad'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref a a 8 const 1 Using where +drop table t1, t2; +create table t1 ( +id int unsigned not null primary key auto_increment, +name varchar(20) not null, +index heap_idx(name), +index btree_idx using btree(name) +) engine=heap; +create table t2 ( +id int unsigned not null primary key auto_increment, +name varchar(20) not null, +index btree_idx using btree(name), +index heap_idx(name) +) engine=heap; +insert into t1 (name) values ('Matt'), ('Lilu'), ('Corbin'), ('Carly'), +('Suzy'), ('Hoppy'), ('Burrito'), ('Mimi'), ('Sherry'), ('Ben'), ('Phil'), +('Emily'), ('Mike'); +insert into t2 select * from t1; +explain select * from t1 where name='matt'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref heap_idx,btree_idx heap_idx 20 const 1 Using where +explain select * from t2 where name='matt'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ref btree_idx,heap_idx btree_idx 20 const 1 Using where +explain select * from t1 where name='Lilu'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref heap_idx,btree_idx heap_idx 20 const 1 Using where +explain select * from t2 where name='Lilu'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ref btree_idx,heap_idx btree_idx 20 const 1 Using where +explain select * from t1 where name='Phil'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref heap_idx,btree_idx heap_idx 20 const 1 Using where +explain select * from t2 where name='Phil'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ref btree_idx,heap_idx btree_idx 20 const 1 Using where +explain select * from t1 where name='Lilu'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref heap_idx,btree_idx heap_idx 20 const 1 Using where +explain select * from t2 where name='Lilu'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ref btree_idx,heap_idx btree_idx 20 const 1 Using where +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +flush tables; +select count(*) from t1 where name='Matt'; +count(*) +7 +explain select * from t1 ignore index (btree_idx) where name='matt'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref heap_idx heap_idx 20 const 7 Using where +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 id NULL 91 NULL NULL HASH +t1 1 heap_idx 1 name NULL 13 NULL NULL HASH +t1 1 btree_idx 1 name A NULL NULL NULL BTREE +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 id NULL 91 NULL NULL HASH +t1 1 heap_idx 1 name NULL 13 NULL NULL HASH +t1 1 btree_idx 1 name A NULL NULL NULL BTREE +create table t3 +( +a varchar(20) not null, +b varchar(20) not null, +key (a,b) +) engine=heap; +insert into t3 select name, name from t1; +show index from t3; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t3 1 a 1 a NULL NULL NULL NULL HASH +t3 1 a 2 b NULL 15 NULL NULL HASH +show index from t3; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t3 1 a 1 a NULL NULL NULL NULL HASH +t3 1 a 2 b NULL 15 NULL NULL HASH +explain select * from t1 ignore key(btree_idx), t3 where t1.name='matt' and t3.a = concat('',t1.name) and t3.b=t1.name; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref heap_idx heap_idx 20 const 7 Using where +1 SIMPLE t3 ref a a 40 func,const 6 Using where +drop table t1, t2, t3; diff --git a/mysql-test/r/myisam.result b/mysql-test/r/myisam.result index 26dcce43d08..31b14f9b822 100644 --- a/mysql-test/r/myisam.result +++ b/mysql-test/r/myisam.result @@ -543,7 +543,7 @@ Warnings: Note 1031 Table storage engine for 't1' doesn't have this option show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 1 a 1 a NULL NULL NULL NULL YES HASH +t1 1 a 1 a NULL 1000 NULL NULL YES HASH drop table t1,t2; create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); insert into t1 values (null,''), (null,''); diff --git a/mysql-test/t/heap_hash.test b/mysql-test/t/heap_hash.test index 412b6f2e705..6d8fdec4b9e 100644 --- a/mysql-test/t/heap_hash.test +++ b/mysql-test/t/heap_hash.test @@ -3,7 +3,7 @@ # --disable_warnings -drop table if exists t1; +drop table if exists t1,t2; --enable_warnings create table t1 (a int not null,b int not null, primary key using HASH (a)) engine=heap comment="testing heaps" avg_row_length=100 min_rows=1 max_rows=100; @@ -141,3 +141,113 @@ INSERT into t1 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11); DELETE from t1 where a < 100; SELECT * from t1; DROP TABLE t1; + + +# +# Hash index # records estimate test +# +create table t1 +( + a char(8) not null, + b char(20) not null, + c int not null, + key (a) +) engine=heap; + +insert into t1 values ('aaaa', 'prefill-hash=5',0); +insert into t1 values ('aaab', 'prefill-hash=0',0); +insert into t1 values ('aaac', 'prefill-hash=7',0); +insert into t1 values ('aaad', 'prefill-hash=2',0); +insert into t1 values ('aaae', 'prefill-hash=1',0); +insert into t1 values ('aaaf', 'prefill-hash=4',0); +insert into t1 values ('aaag', 'prefill-hash=3',0); +insert into t1 values ('aaah', 'prefill-hash=6',0); + +explain select * from t1 where a='aaaa'; +explain select * from t1 where a='aaab'; +explain select * from t1 where a='aaac'; +explain select * from t1 where a='aaad'; +insert into t1 select * from t1; + +explain select * from t1 where a='aaaa'; +explain select * from t1 where a='aaab'; +explain select * from t1 where a='aaac'; +explain select * from t1 where a='aaad'; + +# a known effect: table reload causes statistics to be updated: +flush tables; +explain select * from t1 where a='aaaa'; +explain select * from t1 where a='aaab'; +explain select * from t1 where a='aaac'; +explain select * from t1 where a='aaad'; + +# Check if delete_all_rows() updates #hash_buckets +create table t2 as select * from t1; +delete from t1; +insert into t1 select * from t2; +explain select * from t1 where a='aaaa'; +explain select * from t1 where a='aaab'; +explain select * from t1 where a='aaac'; +explain select * from t1 where a='aaad'; +drop table t1, t2; + + +# Btree and hash index use costs. +create table t1 ( + id int unsigned not null primary key auto_increment, + name varchar(20) not null, + index heap_idx(name), + index btree_idx using btree(name) +) engine=heap; + +create table t2 ( + id int unsigned not null primary key auto_increment, + name varchar(20) not null, + index btree_idx using btree(name), + index heap_idx(name) +) engine=heap; + +insert into t1 (name) values ('Matt'), ('Lilu'), ('Corbin'), ('Carly'), + ('Suzy'), ('Hoppy'), ('Burrito'), ('Mimi'), ('Sherry'), ('Ben'), ('Phil'), + ('Emily'), ('Mike'); +insert into t2 select * from t1; +explain select * from t1 where name='matt'; +explain select * from t2 where name='matt'; + +explain select * from t1 where name='Lilu'; +explain select * from t2 where name='Lilu'; + +explain select * from t1 where name='Phil'; +explain select * from t2 where name='Phil'; + +explain select * from t1 where name='Lilu'; +explain select * from t2 where name='Lilu'; + +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +insert into t1 (name) select name from t2; +flush tables; +select count(*) from t1 where name='Matt'; +explain select * from t1 ignore index (btree_idx) where name='matt'; +show index from t1; + +show index from t1; + +create table t3 +( + a varchar(20) not null, + b varchar(20) not null, + key (a,b) +) engine=heap; +insert into t3 select name, name from t1; +show index from t3; +show index from t3; + +# test rec_per_key use for joins. +explain select * from t1 ignore key(btree_idx), t3 where t1.name='matt' and t3.a = concat('',t1.name) and t3.b=t1.name; + +drop table t1, t2, t3; + diff --git a/sql/ha_heap.cc b/sql/ha_heap.cc index 19b15c6fbcc..dbcf7bc9197 100644 --- a/sql/ha_heap.cc +++ b/sql/ha_heap.cc @@ -30,6 +30,18 @@ const char **ha_heap::bas_ext() const { static const char *ext[1]= { NullS }; return ext; } +/* + Hash index statistics is updated (copied from HP_KEYDEF::hash_buckets to + rec_per_key) after 1/HEAP_STATS_UPDATE_THRESHOLD fraction of table records + have been inserted/updated/deleted. delete_all_rows() and table flush cause + immediate update. + + NOTE + hash index statistics must be updated when number of table records changes + from 0 to non-zero value and vice versa. Otherwise records_in_range may + erroneously return 0 and 'range' may miss records. +*/ +#define HEAP_STATS_UPDATE_THRESHOLD 10 int ha_heap::open(const char *name, int mode, uint test_if_locked) { @@ -48,6 +60,8 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked) { /* Initialize variables for the opened table */ set_keys_for_scanning(); + if (table->tmp_table == NO_TMP_TABLE) + update_key_stats(); } return (file ? 0 : 1); } @@ -84,28 +98,58 @@ void ha_heap::set_keys_for_scanning(void) } } +void ha_heap::update_key_stats() +{ + for (uint i= 0; i < table->keys; i++) + { + KEY *key=table->key_info+i; + if (key->algorithm != HA_KEY_ALG_BTREE) + { + ha_rows hash_buckets= file->s->keydef[i].hash_buckets; + key->rec_per_key[key->key_parts-1]= + hash_buckets ? file->s->records/hash_buckets : 0; + } + } + records_changed= 0; +} + int ha_heap::write_row(byte * buf) { + int res; statistic_increment(ha_write_count,&LOCK_status); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) table->timestamp_field->set_time(); if (table->next_number_field && buf == table->record[0]) update_auto_increment(); - return heap_write(file,buf); + res= heap_write(file,buf); + if (!res && table->tmp_table == NO_TMP_TABLE && + ++records_changed*HEAP_STATS_UPDATE_THRESHOLD > file->s->records) + update_key_stats(); + return res; } int ha_heap::update_row(const byte * old_data, byte * new_data) { + int res; statistic_increment(ha_update_count,&LOCK_status); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) table->timestamp_field->set_time(); - return heap_update(file,old_data,new_data); + res= heap_update(file,old_data,new_data); + if (!res && table->tmp_table == NO_TMP_TABLE && + ++records_changed*HEAP_STATS_UPDATE_THRESHOLD > file->s->records) + update_key_stats(); + return res; } int ha_heap::delete_row(const byte * buf) { + int res; statistic_increment(ha_delete_count,&LOCK_status); - return heap_delete(file,buf); + res= heap_delete(file,buf); + if (!res && table->tmp_table == NO_TMP_TABLE && + ++records_changed*HEAP_STATS_UPDATE_THRESHOLD > file->s->records) + update_key_stats(); + return res; } int ha_heap::index_read(byte * buf, const byte * key, uint key_len, @@ -227,6 +271,8 @@ int ha_heap::extra(enum ha_extra_function operation) int ha_heap::delete_all_rows() { heap_clear(file); + if (table->tmp_table == NO_TMP_TABLE) + update_key_stats(); return 0; } @@ -384,7 +430,8 @@ ha_rows ha_heap::records_in_range(uint inx, key_range *min_key, min_key->flag != HA_READ_KEY_EXACT || max_key->flag != HA_READ_AFTER_KEY) return HA_POS_ERROR; // Can only use exact keys - return 10; // Good guess + else + return key->rec_per_key[key->key_parts-1]; } diff --git a/sql/ha_heap.h b/sql/ha_heap.h index 9ca6b9b76b6..f36e9f31c55 100644 --- a/sql/ha_heap.h +++ b/sql/ha_heap.h @@ -27,9 +27,10 @@ class ha_heap: public handler { HP_INFO *file; key_map btree_keys; - - public: - ha_heap(TABLE *table): handler(table), file(0) {} + /* number of records changed since last statistics update */ + uint records_changed; +public: + ha_heap(TABLE *table): handler(table), file(0), records_changed(0) {} ~ha_heap() {} const char *table_type() const { return "HEAP"; } const char *index_type(uint inx) @@ -91,5 +92,6 @@ class ha_heap: public handler THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type); - +private: + void update_key_stats(); }; diff --git a/sql/structs.h b/sql/structs.h index c30d85f59cb..846b3400fab 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -89,7 +89,12 @@ typedef struct st_key { enum ha_key_alg algorithm; KEY_PART_INFO *key_part; char *name; /* Name of key */ - ulong *rec_per_key; /* Key part distribution */ + /* + Array of AVG(#records with the same field value) for 1st ... Nth key part. + 0 means 'not known'. + For temporary heap tables this member is NULL. + */ + ulong *rec_per_key; union { int bdb_return_if_eq; } handler; |