From 1d56c0324e21e0e1b92e46445a8896006cb9a8e8 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 May 2001 15:07:34 -0600 Subject: use tree for count(distinct) when possible include/my_tree.h: fixed typo in comment mysys/tree.c: fixed typo in comment sql/item_sum.h: use tree for count distinct when possible --- include/my_tree.h | 2 +- mysys/tree.c | 2 +- sql/item_sum.cc | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++- sql/item_sum.h | 15 ++++++-- 4 files changed, 117 insertions(+), 5 deletions(-) diff --git a/include/my_tree.h b/include/my_tree.h index c0950409588..d4d28644a39 100644 --- a/include/my_tree.h +++ b/include/my_tree.h @@ -57,7 +57,7 @@ typedef struct st_tree { void (*free)(void *); } TREE; - /* Functions on hole tree */ + /* Functions on whole tree */ void init_tree(TREE *tree,uint default_alloc_size, int element_size, qsort_cmp2 compare, my_bool with_delete, void (*free_element)(void*)); diff --git a/mysys/tree.c b/mysys/tree.c index db2b3989b81..e46ff00adad 100644 --- a/mysys/tree.c +++ b/mysys/tree.c @@ -84,7 +84,7 @@ void init_tree(TREE *tree, uint default_alloc_size, int size, ((uint) size <= sizeof(void*) || ((uint) size & (sizeof(void*)-1)))) { tree->offset_to_key=sizeof(TREE_ELEMENT); /* Put key after element */ - /* Fix allocation size so that we don't loose any memory */ + /* Fix allocation size so that we don't lose any memory */ default_alloc_size/=(sizeof(TREE_ELEMENT)+size); if (!default_alloc_size) default_alloc_size=1; diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 375ba081f80..da0bc71ca1f 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -788,11 +788,56 @@ String *Item_std_field::val_str(String *str) #include "sql_select.h" +static int simple_raw_key_cmp(void* arg, byte* key1, byte* key2) +{ + return memcmp(key1, key2, (int)arg); +} + +static int simple_str_key_cmp(void* arg, byte* key1, byte* key2) +{ + return my_sortcmp(key1, key2, (int)arg); +} + +// did not make this one static - at least gcc gets confused when +// I try to declare a static function as a friend. If you can figure +// out the syntax to make a static function a friend, make this one +// static +int composite_key_cmp(void* arg, byte* key1, byte* key2) +{ + Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg; + Field** field = item->table->field, **field_end; + field_end = field + item->table->fields; + for(; field < field_end; ++field) + { + int res; + int len = (*field)->field_length; + switch((*field)->type()) + { + case FIELD_TYPE_STRING: + case FIELD_TYPE_VAR_STRING: + res = my_sortcmp(key1, key2, len); + break; + default: + res = memcmp(key1, key2, len); + break; + } + if(res) + return res; + key1 += len; + key2 += len; + } + return 0; +} + + + Item_sum_count_distinct::~Item_sum_count_distinct() { if (table) free_tmp_table(current_thd, table); delete tmp_table_param; + if(use_tree) + delete_tree(&tree); } @@ -821,6 +866,53 @@ bool Item_sum_count_distinct::setup(THD *thd) 0, 0, current_lex->options | thd->options))) return 1; table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows + + if(table->db_type == DB_TYPE_HEAP) // no blobs, otherwise it would be + // MyISAM + { + qsort_cmp2 compare_key; + void* cmp_arg; + int key_len; + + if(table->fields == 1) // if we have only one field, which is + // the most common use of count(distinct), it is much faster + // to use a simpler key compare method that can take advantage + // of not having to worry about other fields + { + switch(table->field[0]->type()) + { + // if we have a string, we must take care of charsets + // and case sensitivity + case FIELD_TYPE_STRING: + case FIELD_TYPE_VAR_STRING: + compare_key = (qsort_cmp2)simple_str_key_cmp; + break; + default: // since at this point we cannot have blobs + // anything else can be compared with memcmp + compare_key = (qsort_cmp2)simple_raw_key_cmp; + break; + } + cmp_arg = (void*)(key_len = table->field[0]->field_length); + rec_offset = 1; + } + else // too bad, cannot cheat - there is more than one field + { + cmp_arg = (void*)this; + compare_key = (qsort_cmp2)composite_key_cmp; + Field** field, **field_end; + field_end = (field = table->field) + table->fields; + for(key_len = 0; field < field_end; ++field) + { + key_len += (*field)->field_length; + } + rec_offset = table->reclength - key_len; + } + + init_tree(&tree, 0, key_len, compare_key, 0, 0); + tree.cmp_arg = cmp_arg; + use_tree = 1; + } + return 0; } @@ -830,6 +922,8 @@ void Item_sum_count_distinct::reset() table->file->extra(HA_EXTRA_NO_CACHE); table->file->delete_all_rows(); table->file->extra(HA_EXTRA_WRITE_CACHE); + if(use_tree) + delete_tree(&tree); (void) add(); } @@ -843,7 +937,12 @@ bool Item_sum_count_distinct::add() if ((*field)->is_real_null(0)) return 0; // Don't count NULL - if ((error=table->file->write_row(table->record[0]))) + if(use_tree) + { + if(!tree_insert(&tree, table->record[0] + rec_offset, 0)) + return 1; + } + else if ((error=table->file->write_row(table->record[0]))) { if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE) @@ -859,6 +958,8 @@ longlong Item_sum_count_distinct::val_int() { if (!table) // Empty query return LL(0); + if(use_tree) + return tree.elements_in_tree; table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); return table->file->records; } diff --git a/sql/item_sum.h b/sql/item_sum.h index 8cb09c85623..863623c3b1a 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -21,6 +21,8 @@ #pragma interface /* gcc class implementation */ #endif +#include + class Item_sum :public Item_result_field { public: @@ -145,11 +147,20 @@ class Item_sum_count_distinct :public Item_sum_int table_map used_table_cache; bool fix_fields(THD *thd,TABLE_LIST *tables); TMP_TABLE_PARAM *tmp_table_param; - + TREE tree; + bool use_tree; // If there are no blobs, we can use a tree, which + // is faster than heap table. In that case, we still use the table + // to help get things set up, but we insert nothing in it + int rec_offset; // the first few bytes of record ( at least one) + // are just markers for deleted and NULLs. We want to skip them since + // they will just bloat the tree without providing any valuable info + + friend int composite_key_cmp(void* arg, byte* key1, byte* key2); + public: Item_sum_count_distinct(List &list) :Item_sum_int(list),table(0),used_table_cache(~(table_map) 0), - tmp_table_param(0) + tmp_table_param(0),use_tree(0) { quick_group=0; } ~Item_sum_count_distinct(); table_map used_tables() const { return used_table_cache; } -- cgit v1.2.1