diff options
author | unknown <sasha@mysql.sashanet.com> | 2001-05-24 17:06:53 -0600 |
---|---|---|
committer | unknown <sasha@mysql.sashanet.com> | 2001-05-24 17:06:53 -0600 |
commit | b7b409b76ee4c2817a882b3f08cb30890dd8532a (patch) | |
tree | 0556f9414f47140d32040504811d813034d86f7d | |
parent | a895b7b447e3b8749f9f14447f2cb72e7d062546 (diff) | |
download | mariadb-git-b7b409b76ee4c2817a882b3f08cb30890dd8532a.tar.gz |
handle tree overflow in count(distinct)
test heap table/tree overflow in count(distinct)
mysql-test/r/count_distinct2.result:
added test for tree/heap table overflow
mysql-test/t/count_distinct2.test:
test tree/heap table overflow
sql/item_sum.cc:
handle tree overflow in count(distinct)
sql/item_sum.h:
t
-rw-r--r-- | mysql-test/r/count_distinct2.result | 8 | ||||
-rw-r--r-- | mysql-test/t/count_distinct2-master.opt | 1 | ||||
-rw-r--r-- | mysql-test/t/count_distinct2.test | 29 | ||||
-rw-r--r-- | sql/item_sum.cc | 43 | ||||
-rw-r--r-- | sql/item_sum.h | 17 |
5 files changed, 94 insertions, 4 deletions
diff --git a/mysql-test/r/count_distinct2.result b/mysql-test/r/count_distinct2.result index b8330835332..3586910e8b3 100644 --- a/mysql-test/r/count_distinct2.result +++ b/mysql-test/r/count_distinct2.result @@ -74,3 +74,11 @@ count(distinct n2) n1 1 NULL 1 1 3 2 +count(distinct n) +5000 +Variable_name Value +Created_tmp_disk_tables 1 +count(distinct s) +5000 +Variable_name Value +Created_tmp_disk_tables 1 diff --git a/mysql-test/t/count_distinct2-master.opt b/mysql-test/t/count_distinct2-master.opt new file mode 100644 index 00000000000..8f1be6dce3a --- /dev/null +++ b/mysql-test/t/count_distinct2-master.opt @@ -0,0 +1 @@ +-O max_heap_table_size=16384 diff --git a/mysql-test/t/count_distinct2.test b/mysql-test/t/count_distinct2.test index 5ddd96198fe..2447a7c3611 100644 --- a/mysql-test/t/count_distinct2.test +++ b/mysql-test/t/count_distinct2.test @@ -43,3 +43,32 @@ select count(distinct n1), count(distinct n2) from t1; select count(distinct n2), n1 from t1 group by n1; drop table t1; + +# test the converstion from tree to MyISAM +create table t1 (n int); +let $1=5000; +while ($1) +{ + eval insert into t1 values($1); + dec $1; +} + +flush status; +select count(distinct n) from t1; +show status like 'Created_tmp_disk_tables'; +drop table t1; + +#test conversion from heap to MyISAM +create table t1 (s text); +let $1=5000; +while ($1) +{ + eval insert into t1 values('$1'); + dec $1; +} + +flush status; +select count(distinct s) from t1; +show status like 'Created_tmp_disk_tables'; +drop table t1; + diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 089f4f56216..6560fdedfd2 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -830,7 +830,26 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2) return 0; } +// helper function for walking the tree when we dump it to MyISAM - +// tree_walk will call it for each +// leaf +int dump_leaf(byte* key, uint32 count __attribute__((unused)), + Item_sum_count_distinct* item) +{ + char* buf = item->table->record[0]; + int error; + memset(buf, 0xff, item->rec_offset); // make up for cheating in the tree + memcpy(buf + item->rec_offset, key, item->tree.size_of_element); + if ((error = item->table->file->write_row(buf))) + { + if (error != HA_ERR_FOUND_DUPP_KEY && + error != HA_ERR_FOUND_DUPP_UNIQUE) + return 1; + } + + return 0; +} Item_sum_count_distinct::~Item_sum_count_distinct() { @@ -916,11 +935,29 @@ bool Item_sum_count_distinct::setup(THD *thd) key_len, compare_key, 0, 0); tree.cmp_arg = cmp_arg; use_tree = 1; + + // the only time key_len could be 0 is if someone does + // count(distinct) on a char(0) field - stupid thing to do, + // but this has to be handled - otherwise someone can crash + // the server with a DoS attack + max_elements_in_tree = (key_len) ? max_heap_table_size/key_len : + max_heap_table_size; } return 0; } +int Item_sum_count_distinct::tree_to_myisam() +{ + if(create_myisam_from_heap(table, tmp_table_param, + HA_ERR_RECORD_FILE_FULL, 1) || + tree_walk(&tree, (tree_walk_action)&dump_leaf, (void*)this, + left_root_right)) + return 1; + delete_tree(&tree); + use_tree = 0; + return 0; +} void Item_sum_count_distinct::reset() { @@ -947,7 +984,11 @@ bool Item_sum_count_distinct::add() if(use_tree) { - if(!tree_insert(&tree, table->record[0] + rec_offset, 0)) + // if the tree got too big, convert to MyISAM, otherwise + // insert into the tree + if((tree.elements_in_tree > max_elements_in_tree && tree_to_myisam()) + || + !tree_insert(&tree, table->record[0] + rec_offset, 0)) return 1; } else if ((error=table->file->write_row(table->record[0]))) diff --git a/sql/item_sum.h b/sql/item_sum.h index 863623c3b1a..1aa7f78d786 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -148,15 +148,26 @@ class Item_sum_count_distinct :public Item_sum_int bool fix_fields(THD *thd,TABLE_LIST *tables); TMP_TABLE_PARAM *tmp_table_param; TREE tree; - bool use_tree; // If there are no blobs, we can use a tree, which + uint max_elements_in_tree; + // calculated based on max_heap_table_size. If reached, + // walk the tree and dump it into MyISAM table + + bool use_tree; + // If there are no blobs, we can use a tree, which // is faster than heap table. In that case, we still use the table // to help get things set up, but we insert nothing in it - int rec_offset; // the first few bytes of record ( at least one) + + int rec_offset; + // the first few bytes of record ( at least one) // are just markers for deleted and NULLs. We want to skip them since // they will just bloat the tree without providing any valuable info - friend int composite_key_cmp(void* arg, byte* key1, byte* key2); + int tree_to_myisam(); + friend int composite_key_cmp(void* arg, byte* key1, byte* key2); + friend int dump_leaf(byte* key, uint32 count __attribute__((unused)), + Item_sum_count_distinct* item); + public: Item_sum_count_distinct(List<Item> &list) :Item_sum_int(list),table(0),used_table_cache(~(table_map) 0), |