summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorGeorgi Kodinov <joro@sun.com>2009-09-28 10:21:25 +0300
committerGeorgi Kodinov <joro@sun.com>2009-09-28 10:21:25 +0300
commit2118bd104c4dd7c2aab0b80fa7053d8a491a90c4 (patch)
tree0600a41e4dca737e1cc98e02c2d8353e5ca4ff5b /sql
parentdb5c088b456408a1b4bda6f1298ae2ae9b202cac (diff)
downloadmariadb-git-2118bd104c4dd7c2aab0b80fa7053d8a491a90c4.tar.gz
Ported WL#3220 to mysql-next-mr.
Diffstat (limited to 'sql')
-rw-r--r--sql/field.h9
-rw-r--r--sql/item_sum.cc1199
-rw-r--r--sql/item_sum.h533
-rw-r--r--sql/opt_range.cc188
-rw-r--r--sql/opt_range.h17
-rw-r--r--sql/opt_sum.cc10
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_select.cc130
-rw-r--r--sql/sql_select.h7
-rw-r--r--sql/sql_yacc.yy6
10 files changed, 1232 insertions, 868 deletions
diff --git a/sql/field.h b/sql/field.h
index a9299256f88..ffcf665d45f 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1934,9 +1934,12 @@ public:
virtual bool str_needs_quotes() { return TRUE; }
my_decimal *val_decimal(my_decimal *);
int cmp(const uchar *a, const uchar *b)
- {
- DBUG_ASSERT(ptr == a);
- return Field_bit::key_cmp(b, bytes_in_rec+test(bit_len));
+ {
+ DBUG_ASSERT(ptr == a || ptr == b);
+ if (ptr == a)
+ return Field_bit::key_cmp(b, bytes_in_rec+test(bit_len));
+ else
+ return Field_bit::key_cmp(a, bytes_in_rec+test(bit_len)) * -1;
}
int cmp_binary_offset(uint row_offset)
{ return cmp_offset(row_offset); }
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index ceb553f1c8a..c8576722c69 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -374,6 +374,7 @@ Item_sum::Item_sum(List<Item> &list) :arg_count(list.elements),
args= NULL;
}
mark_as_sum_func();
+ init_aggregator();
list.empty(); // Fields are used
}
@@ -405,6 +406,10 @@ Item_sum::Item_sum(THD *thd, Item_sum *item):
}
memcpy(args, item->args, sizeof(Item*)*arg_count);
memcpy(orig_args, item->orig_args, sizeof(Item*)*arg_count);
+ init_aggregator();
+ with_distinct= item->with_distinct;
+ if (item->aggr)
+ set_aggregator(item->aggr->Aggrtype());
}
@@ -550,13 +555,513 @@ void Item_sum::update_used_tables ()
}
-Item *Item_sum::set_arg(int i, THD *thd, Item *new_val)
+Item *Item_sum::set_arg(uint i, THD *thd, Item *new_val)
{
thd->change_item_tree(args + i, new_val);
return new_val;
}
+int Item_sum::set_aggregator(Aggregator::Aggregator_type aggregator)
+{
+ switch (aggregator)
+ {
+ case Aggregator::DISTINCT_AGGREGATOR:
+ aggr= new Aggregator_distinct(this);
+ break;
+
+ case Aggregator::SIMPLE_AGGREGATOR:
+ aggr= new Aggregator_simple(this);
+ break;
+ };
+ return aggr ? FALSE : TRUE;
+}
+
+
+void Item_sum::cleanup()
+{
+ if (aggr)
+ {
+ delete aggr;
+ aggr= NULL;
+ }
+ Item_result_field::cleanup();
+ forced_const= FALSE;
+}
+
+
+/**
+ Compare keys consisting of single field that cannot be compared as binary.
+
+ Used by the Unique class to compare keys. Will do correct comparisons
+ for all field types.
+
+ @param arg Pointer to the relevant Field class instance
+ @param key1 left key image
+ @param key2 right key image
+ @return comparison result
+ @retval < 0 if key1 < key2
+ @retval = 0 if key1 = key2
+ @retval > 0 if key1 > key2
+*/
+
+static int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2)
+{
+ Field *f= (Field*) arg;
+ return f->cmp(key1, key2);
+}
+
+
+/**
+ Correctly compare composite keys.
+
+ Used by the Unique class to compare keys. Will do correct comparisons
+ for composite keys with various field types.
+
+ @param arg Pointer to the relevant Aggregator_distinct instance
+ @param key1 left key image
+ @param key2 right key image
+ @return comparison result
+ @retval <0 if key1 < key2
+ @retval =0 if key1 = key2
+ @retval >0 if key1 > key2
+*/
+
+int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2)
+{
+ Aggregator_distinct *aggr= (Aggregator_distinct *) arg;
+ Field **field = aggr->table->field;
+ Field **field_end= field + aggr->table->s->fields;
+ uint32 *lengths=aggr->field_lengths;
+ for (; field < field_end; ++field)
+ {
+ Field* f = *field;
+ int len = *lengths++;
+ int res = f->cmp(key1, key2);
+ if (res)
+ return res;
+ key1 += len;
+ key2 += len;
+ }
+ return 0;
+}
+
+
+static enum enum_field_types
+calc_tmp_field_type(enum enum_field_types table_field_type,
+ Item_result result_type)
+{
+ /* Adjust tmp table type according to the chosen aggregation type */
+ switch (result_type) {
+ case STRING_RESULT:
+ case REAL_RESULT:
+ if (table_field_type != MYSQL_TYPE_FLOAT)
+ table_field_type= MYSQL_TYPE_DOUBLE;
+ break;
+ case INT_RESULT:
+ table_field_type= MYSQL_TYPE_LONGLONG;
+ /* fallthrough */
+ case DECIMAL_RESULT:
+ if (table_field_type != MYSQL_TYPE_LONGLONG)
+ table_field_type= MYSQL_TYPE_NEWDECIMAL;
+ break;
+ case ROW_RESULT:
+ default:
+ DBUG_ASSERT(0);
+ }
+ return table_field_type;
+}
+
+
+/***************************************************************************/
+
+C_MODE_START
+
+/* Declarations for auxilary C-callbacks */
+
+static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2)
+{
+ return memcmp(key1, key2, *(uint *) arg);
+}
+
+
+static int item_sum_distinct_walk(void *element, element_count num_of_dups,
+ void *item)
+{
+ return ((Aggregator_distinct*) (item))->unique_walk_function(element);
+}
+
+C_MODE_END
+
+/***************************************************************************/
+/**
+ Called before feeding the first row. Used to allocate/setup
+ the internal structures used for aggregation.
+
+ @param thd Thread descriptor
+ @return status
+ @retval FALSE success
+ @retval TRUE faliure
+
+ Prepares Aggregator_distinct to process the incoming stream.
+ Creates the temporary table and the Unique class if needed.
+ Called by Item_sum::aggregator_setup()
+*/
+
+bool Aggregator_distinct::setup(THD *thd)
+{
+ endup_done= FALSE;
+ /*
+ Setup can be called twice for ROLLUP items. This is a bug.
+ Please add DBUG_ASSERT(tree == 0) here when it's fixed.
+ */
+ if (tree || table || tmp_table_param)
+ return FALSE;
+
+ if (item_sum->setup(thd))
+ return TRUE;
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ List<Item> list;
+ SELECT_LEX *select_lex= thd->lex->current_select;
+
+ if (!(tmp_table_param= new TMP_TABLE_PARAM))
+ return TRUE;
+
+ /* Create a table with an unique key over all parameters */
+ for (uint i=0; i < item_sum->get_arg_count() ; i++)
+ {
+ Item *item=item_sum->get_arg(i);
+ if (list.push_back(item))
+ return TRUE; // End of memory
+ if (item->const_item() && item->is_null())
+ always_null=1;
+ }
+ if (always_null)
+ return FALSE;
+ count_field_types(select_lex,tmp_table_param,list,0);
+ tmp_table_param->force_copy_fields= item_sum->force_copy_fields;
+ DBUG_ASSERT(table == 0);
+ /*
+ Make create_tmp_table() convert BIT columns to BIGINT.
+ This is needed because BIT fields store parts of their data in table's
+ null bits, and we don't have methods to compare two table records, which
+ is needed by Unique which is used when HEAP table is used.
+ */
+ {
+ List_iterator_fast<Item> li(list);
+ Item *item;
+ while ((item= li++))
+ {
+ if (item->type() == Item::FIELD_ITEM &&
+ ((Item_field*)item)->field->type() == FIELD_TYPE_BIT)
+ item->marker=4;
+ }
+ }
+ if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
+ 0,
+ (select_lex->options | thd->options),
+ HA_POS_ERROR, (char*)"")))
+ return TRUE;
+ table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
+ table->no_rows=1;
+
+ if (table->s->db_type() == heap_hton)
+ {
+ /*
+ No blobs, otherwise it would have been MyISAM: set up a compare
+ function and its arguments to use with Unique.
+ */
+ qsort_cmp2 compare_key;
+ void* cmp_arg;
+ Field **field= table->field;
+ Field **field_end= field + table->s->fields;
+ bool all_binary= TRUE;
+
+ for (tree_key_length= 0; field < field_end; ++field)
+ {
+ Field *f= *field;
+ enum enum_field_types type= f->type();
+ tree_key_length+= f->pack_length();
+ if ((type == MYSQL_TYPE_VARCHAR) ||
+ (!f->binary() && (type == MYSQL_TYPE_STRING ||
+ type == MYSQL_TYPE_VAR_STRING)))
+ {
+ all_binary= FALSE;
+ break;
+ }
+ }
+ if (all_binary)
+ {
+ cmp_arg= (void*) &tree_key_length;
+ compare_key= (qsort_cmp2) simple_raw_key_cmp;
+ }
+ else
+ {
+ if (table->s->fields == 1)
+ {
+ /*
+ If we have only one field, which is the most common use of
+ count(distinct), it is much faster to use a simpler key
+ compare method that can take advantage of not having to worry
+ about other fields.
+ */
+ compare_key= (qsort_cmp2) simple_str_key_cmp;
+ cmp_arg= (void*) table->field[0];
+ /* tree_key_length has been set already */
+ }
+ else
+ {
+ uint32 *length;
+ compare_key= (qsort_cmp2) composite_key_cmp;
+ cmp_arg= (void*) this;
+ field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
+ for (tree_key_length= 0, length= field_lengths, field= table->field;
+ field < field_end; ++field, ++length)
+ {
+ *length= (*field)->pack_length();
+ tree_key_length+= *length;
+ }
+ }
+ }
+ DBUG_ASSERT(tree == 0);
+ tree= new Unique(compare_key, cmp_arg, tree_key_length,
+ thd->variables.max_heap_table_size);
+ /*
+ The only time tree_key_length could be 0 is if someone does
+ count(distinct) on a char(0) field - stupid thing to do,
+ but this has to be handled - otherwise someone can crash
+ the server with a DoS attack
+ */
+ if (! tree)
+ return TRUE;
+ }
+ return FALSE;
+ }
+ else
+ {
+ List<Create_field> field_list;
+ Create_field field_def; /* field definition */
+ Item *arg;
+ DBUG_ENTER("Item_sum_distinct::setup");
+ /* It's legal to call setup() more than once when in a subquery */
+ if (tree)
+ return FALSE;
+
+ /*
+ Virtual table and the tree are created anew on each re-execution of
+ PS/SP. Hence all further allocations are performed in the runtime
+ mem_root.
+ */
+ if (field_list.push_back(&field_def))
+ return TRUE;
+
+ item_sum->null_value= item_sum->maybe_null= 1;
+ item_sum->quick_group= 0;
+
+ DBUG_ASSERT(item_sum->get_arg(0)->fixed);
+
+ arg = item_sum->get_arg(0);
+ if (arg->const_item())
+ {
+ (void) arg->val_int();
+ if (arg->null_value)
+ always_null=1;
+ }
+
+ if (always_null)
+ return FALSE;
+
+ enum enum_field_types field_type;
+
+ field_type= calc_tmp_field_type(arg->field_type(),
+ arg->result_type());
+ field_def.init_for_tmp_table(field_type,
+ arg->max_length,
+ arg->decimals,
+ arg->maybe_null,
+ arg->unsigned_flag);
+
+ if (! (table= create_virtual_tmp_table(thd, field_list)))
+ return TRUE;
+
+ /* XXX: check that the case of CHAR(0) works OK */
+ tree_key_length= table->s->reclength - table->s->null_bytes;
+
+ /*
+ Unique handles all unique elements in a tree until they can't fit
+ in. Then the tree is dumped to the temporary file. We can use
+ simple_raw_key_cmp because the table contains numbers only; decimals
+ are converted to binary representation as well.
+ */
+ tree= new Unique(simple_raw_key_cmp, &tree_key_length, tree_key_length,
+ thd->variables.max_heap_table_size);
+
+ DBUG_RETURN(tree == 0);
+ }
+}
+
+
+/**
+ Invalidate calculated value and clear the distinct rows.
+
+ Frees space used by the internal data structures.
+ Removes the accumulated distinct rows. Invalidates the calculated result.
+*/
+
+void Aggregator_distinct::clear()
+{
+ endup_done= FALSE;
+ item_sum->clear();
+ if (tree)
+ tree->reset();
+ /* tree and table can be both null only if always_null */
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ if (!tree && table)
+ {
+ table->file->extra(HA_EXTRA_NO_CACHE);
+ table->file->ha_delete_all_rows();
+ table->file->extra(HA_EXTRA_WRITE_CACHE);
+ }
+ }
+ else
+ {
+ item_sum->null_value= 1;
+ }
+}
+
+
+/**
+ Process incoming row.
+
+ Add it to Unique/temp hash table if it's unique. Skip the row if
+ not unique.
+ Prepare Aggregator_distinct to process the incoming stream.
+ Create the temporary table and the Unique class if needed.
+ Called by Item_sum::aggregator_add().
+ To actually get the result value in item_sum's buffers
+ Aggregator_distinct::endup() must be called.
+
+ @return status
+ @retval FALSE success
+ @retval TRUE failure
+*/
+
+bool Aggregator_distinct::add()
+{
+ if (always_null)
+ return 0;
+
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ int error;
+ copy_fields(tmp_table_param);
+ copy_funcs(tmp_table_param->items_to_copy);
+
+ for (Field **field=table->field ; *field ; field++)
+ if ((*field)->is_real_null(0))
+ return 0; // Don't count NULL
+
+ if (tree)
+ {
+ /*
+ The first few bytes of record (at least one) are just markers
+ for deleted and NULLs. We want to skip them since they will
+ bloat the tree without providing any valuable info. Besides,
+ key_length used to initialize the tree didn't include space for them.
+ */
+ return tree->unique_add(table->record[0] + table->s->null_bytes);
+ }
+ if ((error= table->file->ha_write_row(table->record[0])) &&
+ table->file->is_fatal_error(error, HA_CHECK_DUP))
+ return TRUE;
+ return FALSE;
+ }
+ else
+ {
+ item_sum->get_arg(0)->save_in_field(table->field[0], FALSE);
+ if (table->field[0]->is_null())
+ return 0;
+ DBUG_ASSERT(tree);
+ item_sum->null_value= 0;
+ /*
+ '0' values are also stored in the tree. This doesn't matter
+ for SUM(DISTINCT), but is important for AVG(DISTINCT)
+ */
+ return tree->unique_add(table->field[0]->ptr);
+ }
+}
+
+
+/**
+ Calculate the aggregate function value.
+
+ Since Distinct_aggregator::add() just collects the distinct rows,
+ we must go over the distinct rows and feed them to the aggregation
+ function before returning its value.
+ This is what endup () does. It also sets the result validity flag
+ endup_done to TRUE so it will not recalculate the aggregate value
+ again if the Item_sum hasn't been reset.
+*/
+
+void Aggregator_distinct::endup()
+{
+ /* prevent consecutive recalculations */
+ if (endup_done)
+ return;
+
+ /* we are going to calculate the aggregate value afresh */
+ item_sum->clear();
+
+ /* The result will definitely be null : no more calculations needed */
+ if (always_null)
+ return;
+
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ DBUG_ASSERT(item_sum->fixed == 1);
+ Item_sum_count *sum= (Item_sum_count *)item_sum;
+ if (tree && tree->elements == 0)
+ {
+ /* everything fits in memory */
+ sum->count= (longlong) tree->elements_in_tree();
+ endup_done= TRUE;
+ }
+ if (!tree)
+ {
+ /* there were blobs */
+ table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+ sum->count= table->file->stats.records;
+ endup_done= TRUE;
+ }
+ }
+ else
+ {
+ /*
+ We don't have a tree only if 'setup()' hasn't been called;
+ this is the case of sql_select.cc:return_zero_rows.
+ */
+ if (tree)
+ table->field[0]->set_notnull();
+ }
+
+ if (tree && !endup_done)
+ {
+ /* go over the tree of distinct keys and calculate the aggregate value */
+ use_distinct_values= TRUE;
+ tree->walk(item_sum_distinct_walk, (void*) this);
+ use_distinct_values= FALSE;
+ }
+ /* prevent consecutive recalculations */
+ endup_done= TRUE;
+}
+
+
String *
Item_sum_num::val_str(String *str)
{
@@ -823,10 +1328,27 @@ void Item_sum_sum::fix_length_and_dec()
bool Item_sum_sum::add()
{
DBUG_ENTER("Item_sum_sum::add");
+ bool arg_is_null;
if (hybrid_type == DECIMAL_RESULT)
{
- my_decimal value, *val= args[0]->val_decimal(&value);
- if (!args[0]->null_value)
+ my_decimal value, *val;
+ if (aggr->use_distinct_values)
+ {
+ /*
+ We are aggregating distinct rows. Get the value from the distinct
+ table pointer
+ */
+ Aggregator_distinct *daggr= (Aggregator_distinct *)aggr;
+ val= daggr->table->field[0]->val_decimal (&value);
+ arg_is_null= daggr->table->field[0]->is_null();
+ }
+ else
+ {
+ /* non-distinct aggregation */
+ val= args[0]->val_decimal(&value);
+ arg_is_null= args[0]->null_value;
+ }
+ if (!arg_is_null)
{
my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1),
val, dec_buffs + curr_dec_buff);
@@ -836,8 +1358,25 @@ bool Item_sum_sum::add()
}
else
{
- sum+= args[0]->val_real();
- if (!args[0]->null_value)
+ double val;
+ if (aggr->use_distinct_values)
+ {
+ /*
+ We are aggregating distinct rows. Get the value from the distinct
+ table pointer
+ */
+ Aggregator_distinct *daggr= (Aggregator_distinct *)aggr;
+ val= daggr->table->field[0]->val_real ();
+ arg_is_null= daggr->table->field[0]->is_null();
+ }
+ else
+ {
+ /* non-distinct aggregation */
+ val= args[0]->val_real();
+ arg_is_null= args[0]->null_value;
+ }
+ sum+= val;
+ if (!arg_is_null)
null_value= 0;
}
DBUG_RETURN(0);
@@ -847,6 +1386,8 @@ bool Item_sum_sum::add()
longlong Item_sum_sum::val_int()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
{
longlong result;
@@ -861,6 +1402,8 @@ longlong Item_sum_sum::val_int()
double Item_sum_sum::val_real()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
my_decimal2double(E_DEC_FATAL_ERROR, dec_buffs + curr_dec_buff, &sum);
return sum;
@@ -869,6 +1412,8 @@ double Item_sum_sum::val_real()
String *Item_sum_sum::val_str(String *str)
{
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
return val_string_from_decimal(str);
return val_string_from_real(str);
@@ -877,311 +1422,54 @@ String *Item_sum_sum::val_str(String *str)
my_decimal *Item_sum_sum::val_decimal(my_decimal *val)
{
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
return (dec_buffs + curr_dec_buff);
return val_decimal_from_real(val);
}
-/***************************************************************************/
-
-C_MODE_START
-
-/* Declarations for auxilary C-callbacks */
-
-static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2)
-{
- return memcmp(key1, key2, *(uint *) arg);
-}
-
-
-static int item_sum_distinct_walk(void *element, element_count num_of_dups,
- void *item)
-{
- return ((Item_sum_distinct*) (item))->unique_walk_function(element);
-}
-
-C_MODE_END
-
-/* Item_sum_distinct */
-
-Item_sum_distinct::Item_sum_distinct(Item *item_arg)
- :Item_sum_num(item_arg), tree(0)
-{
- /*
- quick_group is an optimizer hint, which means that GROUP BY can be
- handled with help of index on grouped columns.
- By setting quick_group to zero we force creation of temporary table
- to perform GROUP BY.
- */
- quick_group= 0;
-}
-
-
-Item_sum_distinct::Item_sum_distinct(THD *thd, Item_sum_distinct *original)
- :Item_sum_num(thd, original), val(original->val), tree(0),
- table_field_type(original->table_field_type)
-{
- quick_group= 0;
-}
-
-
/**
- Behaves like an Integer except to fix_length_and_dec().
- Additionally div() converts val with this traits to a val with true
- decimal traits along with conversion of integer value to decimal value.
- This is to speedup SUM/AVG(DISTINCT) evaluation for 8-32 bit integer
- values.
+ Aggregate a distinct row from the distinct hash table.
+
+ Called for each row into the hash table 'Aggregator_distinct::table'.
+ Includes the current distinct row into the calculation of the
+ aggregate value. Uses the Field classes to get the value from the row.
+ This function is used for AVG/SUM(DISTINCT). For COUNT(DISTINCT)
+ it's called only when there are no blob arguments and the data don't
+ fit into memory (so Unique makes persisted trees on disk).
+
+ @param element pointer to the row data.
+
+ @return status
+ @retval FALSE success
+ @retval TRUE failure
*/
-struct Hybrid_type_traits_fast_decimal: public
- Hybrid_type_traits_integer
-{
- virtual Item_result type() const { return DECIMAL_RESULT; }
- virtual void fix_length_and_dec(Item *item, Item *arg) const
- { Hybrid_type_traits_decimal::instance()->fix_length_and_dec(item, arg); }
-
- virtual void div(Hybrid_type *val, ulonglong u) const
- {
- int2my_decimal(E_DEC_FATAL_ERROR, val->integer, 0, val->dec_buf);
- val->used_dec_buf_no= 0;
- val->traits= Hybrid_type_traits_decimal::instance();
- val->traits->div(val, u);
- }
- static const Hybrid_type_traits_fast_decimal *instance();
- Hybrid_type_traits_fast_decimal() {};
-};
-
-static const Hybrid_type_traits_fast_decimal fast_decimal_traits_instance;
-
-const Hybrid_type_traits_fast_decimal
- *Hybrid_type_traits_fast_decimal::instance()
-{
- return &fast_decimal_traits_instance;
-}
-
-void Item_sum_distinct::fix_length_and_dec()
+
+bool Aggregator_distinct::unique_walk_function(void *element)
{
- DBUG_ASSERT(args[0]->fixed);
-
- table_field_type= args[0]->field_type();
-
- /* Adjust tmp table type according to the chosen aggregation type */
- switch (args[0]->result_type()) {
- case STRING_RESULT:
- case REAL_RESULT:
- val.traits= Hybrid_type_traits::instance();
- if (table_field_type != MYSQL_TYPE_FLOAT)
- table_field_type= MYSQL_TYPE_DOUBLE;
- break;
- case INT_RESULT:
- /*
- Preserving int8, int16, int32 field types gives ~10% performance boost
- as the size of result tree becomes significantly smaller.
- Another speed up we gain by using longlong for intermediate
- calculations. The range of int64 is enough to hold sum 2^32 distinct
- integers each <= 2^32.
- */
- if (table_field_type == MYSQL_TYPE_INT24 ||
- (table_field_type >= MYSQL_TYPE_TINY &&
- table_field_type <= MYSQL_TYPE_LONG))
- {
- val.traits= Hybrid_type_traits_fast_decimal::instance();
- break;
- }
- table_field_type= MYSQL_TYPE_LONGLONG;
- /* fallthrough */
- case DECIMAL_RESULT:
- val.traits= Hybrid_type_traits_decimal::instance();
- if (table_field_type != MYSQL_TYPE_LONGLONG)
- table_field_type= MYSQL_TYPE_NEWDECIMAL;
- break;
- case ROW_RESULT:
- default:
- DBUG_ASSERT(0);
- }
- val.traits->fix_length_and_dec(this, args[0]);
+ memcpy(table->field[0]->ptr, element, tree_key_length);
+ item_sum->add();
+ return 0;
}
-/**
- @todo
- check that the case of CHAR(0) works OK
-*/
-bool Item_sum_distinct::setup(THD *thd)
+Aggregator_distinct::~Aggregator_distinct()
{
- List<Create_field> field_list;
- Create_field field_def; /* field definition */
- DBUG_ENTER("Item_sum_distinct::setup");
- /* It's legal to call setup() more than once when in a subquery */
if (tree)
- DBUG_RETURN(FALSE);
-
- /*
- Virtual table and the tree are created anew on each re-execution of
- PS/SP. Hence all further allocations are performed in the runtime
- mem_root.
- */
- if (field_list.push_back(&field_def))
- DBUG_RETURN(TRUE);
-
- null_value= maybe_null= 1;
- quick_group= 0;
-
- DBUG_ASSERT(args[0]->fixed);
-
- field_def.init_for_tmp_table(table_field_type, args[0]->max_length,
- args[0]->decimals, args[0]->maybe_null,
- args[0]->unsigned_flag);
-
- if (! (table= create_virtual_tmp_table(thd, field_list)))
- DBUG_RETURN(TRUE);
-
- /* XXX: check that the case of CHAR(0) works OK */
- tree_key_length= table->s->reclength - table->s->null_bytes;
-
- /*
- Unique handles all unique elements in a tree until they can't fit
- in. Then the tree is dumped to the temporary file. We can use
- simple_raw_key_cmp because the table contains numbers only; decimals
- are converted to binary representation as well.
- */
- tree= new Unique(simple_raw_key_cmp, &tree_key_length, tree_key_length,
- thd->variables.max_heap_table_size);
-
- is_evaluated= FALSE;
- DBUG_RETURN(tree == 0);
-}
-
-
-bool Item_sum_distinct::add()
-{
- args[0]->save_in_field(table->field[0], FALSE);
- is_evaluated= FALSE;
- if (!table->field[0]->is_null())
{
- DBUG_ASSERT(tree);
- null_value= 0;
- /*
- '0' values are also stored in the tree. This doesn't matter
- for SUM(DISTINCT), but is important for AVG(DISTINCT)
- */
- return tree->unique_add(table->field[0]->ptr);
+ delete tree;
+ tree= NULL;
}
- return 0;
-}
-
-
-bool Item_sum_distinct::unique_walk_function(void *element)
-{
- memcpy(table->field[0]->ptr, element, tree_key_length);
- ++count;
- val.traits->add(&val, table->field[0]);
- return 0;
-}
-
-
-void Item_sum_distinct::clear()
-{
- DBUG_ENTER("Item_sum_distinct::clear");
- DBUG_ASSERT(tree != 0); /* we always have a tree */
- null_value= 1;
- tree->reset();
- is_evaluated= FALSE;
- DBUG_VOID_RETURN;
-}
-
-void Item_sum_distinct::cleanup()
-{
- Item_sum_num::cleanup();
- delete tree;
- tree= 0;
- table= 0;
- is_evaluated= FALSE;
-}
-
-Item_sum_distinct::~Item_sum_distinct()
-{
- delete tree;
- /* no need to free the table */
-}
-
-
-void Item_sum_distinct::calculate_val_and_count()
-{
- if (!is_evaluated)
+ if (table)
{
- count= 0;
- val.traits->set_zero(&val);
- /*
- We don't have a tree only if 'setup()' hasn't been called;
- this is the case of sql_select.cc:return_zero_rows.
- */
- if (tree)
- {
- table->field[0]->set_notnull();
- tree->walk(item_sum_distinct_walk, (void*) this);
- }
- is_evaluated= TRUE;
+ free_tmp_table(table->in_use, table);
+ table=NULL;
}
-}
-
-
-double Item_sum_distinct::val_real()
-{
- calculate_val_and_count();
- return val.traits->val_real(&val);
-}
-
-
-my_decimal *Item_sum_distinct::val_decimal(my_decimal *to)
-{
- calculate_val_and_count();
- if (null_value)
- return 0;
- return val.traits->val_decimal(&val, to);
-}
-
-
-longlong Item_sum_distinct::val_int()
-{
- calculate_val_and_count();
- return val.traits->val_int(&val, unsigned_flag);
-}
-
-
-String *Item_sum_distinct::val_str(String *str)
-{
- calculate_val_and_count();
- if (null_value)
- return 0;
- return val.traits->val_str(&val, str, decimals);
-}
-
-/* end of Item_sum_distinct */
-
-/* Item_sum_avg_distinct */
-
-void
-Item_sum_avg_distinct::fix_length_and_dec()
-{
- Item_sum_distinct::fix_length_and_dec();
- prec_increment= current_thd->variables.div_precincrement;
- /*
- AVG() will divide val by count. We need to reserve digits
- after decimal point as the result can be fractional.
- */
- decimals= min(decimals + prec_increment, NOT_FIXED_DEC);
-}
-
-
-void
-Item_sum_avg_distinct::calculate_val_and_count()
-{
- if (!is_evaluated)
+ if (tmp_table_param)
{
- Item_sum_distinct::calculate_val_and_count();
- if (count)
- val.traits->div(&val, count);
- is_evaluated= TRUE;
+ delete tmp_table_param;
+ tmp_table_param= NULL;
}
}
@@ -1208,6 +1496,8 @@ bool Item_sum_count::add()
longlong Item_sum_count::val_int()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
return (longlong) count;
}
@@ -1298,6 +1588,8 @@ bool Item_sum_avg::add()
double Item_sum_avg::val_real()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (!count)
{
null_value=1;
@@ -1312,6 +1604,8 @@ my_decimal *Item_sum_avg::val_decimal(my_decimal *val)
my_decimal sum_buff, cnt;
const my_decimal *sum_dec;
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (!count)
{
null_value=1;
@@ -1334,6 +1628,8 @@ my_decimal *Item_sum_avg::val_decimal(my_decimal *val)
String *Item_sum_avg::val_str(String *str)
{
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
return val_string_from_decimal(str);
return val_string_from_real(str);
@@ -2029,6 +2325,7 @@ void Item_sum_hybrid::reset_field()
void Item_sum_sum::reset_field()
{
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
@@ -2053,6 +2350,7 @@ void Item_sum_count::reset_field()
{
uchar *res=result_field->ptr;
longlong nr=0;
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (!args[0]->maybe_null || !args[0]->is_null())
nr=1;
@@ -2063,6 +2361,7 @@ void Item_sum_count::reset_field()
void Item_sum_avg::reset_field()
{
uchar *res=result_field->ptr;
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (hybrid_type == DECIMAL_RESULT)
{
longlong tmp;
@@ -2116,6 +2415,7 @@ void Item_sum_bit::update_field()
void Item_sum_sum::update_field()
{
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
@@ -2168,6 +2468,9 @@ void Item_sum_avg::update_field()
{
longlong field_count;
uchar *res=result_field->ptr;
+
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
+
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
@@ -2470,318 +2773,6 @@ double Item_variance_field::val_real()
/****************************************************************************
-** COUNT(DISTINCT ...)
-****************************************************************************/
-
-int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2)
-{
- Field *f= (Field*) arg;
- return f->cmp(key1, key2);
-}
-
-/**
- Did not make this one static - at least gcc gets confused when
- I try to declare a static function as a friend. If you can figure
- out the syntax to make a static function a friend, make this one
- static
-*/
-
-int composite_key_cmp(void* arg, uchar* key1, uchar* key2)
-{
- Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
- Field **field = item->table->field;
- Field **field_end= field + item->table->s->fields;
- uint32 *lengths=item->field_lengths;
- for (; field < field_end; ++field)
- {
- Field* f = *field;
- int len = *lengths++;
- int res = f->cmp(key1, key2);
- if (res)
- return res;
- key1 += len;
- key2 += len;
- }
- return 0;
-}
-
-
-C_MODE_START
-
-static int count_distinct_walk(void *elem, element_count count, void *arg)
-{
- (*((ulonglong*)arg))++;
- return 0;
-}
-
-C_MODE_END
-
-
-void Item_sum_count_distinct::cleanup()
-{
- DBUG_ENTER("Item_sum_count_distinct::cleanup");
- Item_sum_int::cleanup();
-
- /* Free objects only if we own them. */
- if (!original)
- {
- /*
- We need to delete the table and the tree in cleanup() as
- they were allocated in the runtime memroot. Using the runtime
- memroot reduces memory footprint for PS/SP and simplifies setup().
- */
- delete tree;
- tree= 0;
- is_evaluated= FALSE;
- if (table)
- {
- free_tmp_table(table->in_use, table);
- table= 0;
- }
- delete tmp_table_param;
- tmp_table_param= 0;
- }
- always_null= FALSE;
- DBUG_VOID_RETURN;
-}
-
-
-/**
- This is used by rollup to create a separate usable copy of
- the function.
-*/
-
-void Item_sum_count_distinct::make_unique()
-{
- table=0;
- original= 0;
- force_copy_fields= 1;
- tree= 0;
- is_evaluated= FALSE;
- tmp_table_param= 0;
- always_null= FALSE;
-}
-
-
-Item_sum_count_distinct::~Item_sum_count_distinct()
-{
- cleanup();
-}
-
-
-bool Item_sum_count_distinct::setup(THD *thd)
-{
- List<Item> list;
- SELECT_LEX *select_lex= thd->lex->current_select;
-
- /*
- Setup can be called twice for ROLLUP items. This is a bug.
- Please add DBUG_ASSERT(tree == 0) here when it's fixed.
- It's legal to call setup() more than once when in a subquery
- */
- if (tree || table || tmp_table_param)
- return FALSE;
-
- if (!(tmp_table_param= new TMP_TABLE_PARAM))
- return TRUE;
-
- /* Create a table with an unique key over all parameters */
- for (uint i=0; i < arg_count ; i++)
- {
- Item *item=args[i];
- if (list.push_back(item))
- return TRUE; // End of memory
- if (item->const_item() && item->is_null())
- always_null= 1;
- }
- if (always_null)
- return FALSE;
- count_field_types(select_lex, tmp_table_param, list, 0);
- tmp_table_param->force_copy_fields= force_copy_fields;
- DBUG_ASSERT(table == 0);
- /*
- Make create_tmp_table() convert BIT columns to BIGINT.
- This is needed because BIT fields store parts of their data in table's
- null bits, and we don't have methods to compare two table records, which
- is needed by Unique which is used when HEAP table is used.
- */
- {
- List_iterator_fast<Item> li(list);
- Item *item;
- while ((item= li++))
- {
- if (item->type() == Item::FIELD_ITEM &&
- ((Item_field*)item)->field->type() == FIELD_TYPE_BIT)
- item->marker=4;
- }
- }
-
- if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
- 0,
- (select_lex->options | thd->options),
- HA_POS_ERROR, (char*)"")))
- return TRUE;
- table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
- table->no_rows=1;
-
- if (table->s->db_type() == heap_hton)
- {
- /*
- No blobs, otherwise it would have been MyISAM: set up a compare
- function and its arguments to use with Unique.
- */
- qsort_cmp2 compare_key;
- void* cmp_arg;
- Field **field= table->field;
- Field **field_end= field + table->s->fields;
- bool all_binary= TRUE;
-
- for (tree_key_length= 0; field < field_end; ++field)
- {
- Field *f= *field;
- enum enum_field_types f_type= f->type();
- tree_key_length+= f->pack_length();
- if ((f_type == MYSQL_TYPE_VARCHAR) ||
- (!f->binary() && (f_type == MYSQL_TYPE_STRING ||
- f_type == MYSQL_TYPE_VAR_STRING)))
- {
- all_binary= FALSE;
- break;
- }
- }
- if (all_binary)
- {
- cmp_arg= (void*) &tree_key_length;
- compare_key= (qsort_cmp2) simple_raw_key_cmp;
- }
- else
- {
- if (table->s->fields == 1)
- {
- /*
- If we have only one field, which is the most common use of
- count(distinct), it is much faster to use a simpler key
- compare method that can take advantage of not having to worry
- about other fields.
- */
- compare_key= (qsort_cmp2) simple_str_key_cmp;
- cmp_arg= (void*) table->field[0];
- /* tree_key_length has been set already */
- }
- else
- {
- uint32 *length;
- compare_key= (qsort_cmp2) composite_key_cmp;
- cmp_arg= (void*) this;
- field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
- for (tree_key_length= 0, length= field_lengths, field= table->field;
- field < field_end; ++field, ++length)
- {
- *length= (*field)->pack_length();
- tree_key_length+= *length;
- }
- }
- }
- DBUG_ASSERT(tree == 0);
- tree= new Unique(compare_key, cmp_arg, tree_key_length,
- thd->variables.max_heap_table_size);
- /*
- The only time tree_key_length could be 0 is if someone does
- count(distinct) on a char(0) field - stupid thing to do,
- but this has to be handled - otherwise someone can crash
- the server with a DoS attack
- */
- is_evaluated= FALSE;
- if (! tree)
- return TRUE;
- }
- return FALSE;
-}
-
-
-Item *Item_sum_count_distinct::copy_or_same(THD* thd)
-{
- return new (thd->mem_root) Item_sum_count_distinct(thd, this);
-}
-
-
-void Item_sum_count_distinct::clear()
-{
- /* tree and table can be both null only if always_null */
- is_evaluated= FALSE;
- if (tree)
- {
- tree->reset();
- }
- else if (table)
- {
- table->file->extra(HA_EXTRA_NO_CACHE);
- table->file->ha_delete_all_rows();
- table->file->extra(HA_EXTRA_WRITE_CACHE);
- }
-}
-
-bool Item_sum_count_distinct::add()
-{
- int error;
- if (always_null)
- return 0;
- copy_fields(tmp_table_param);
- copy_funcs(tmp_table_param->items_to_copy);
-
- for (Field **field=table->field ; *field ; field++)
- if ((*field)->is_real_null(0))
- return 0; // Don't count NULL
-
- is_evaluated= FALSE;
- if (tree)
- {
- /*
- The first few bytes of record (at least one) are just markers
- for deleted and NULLs. We want to skip them since they will
- bloat the tree without providing any valuable info. Besides,
- key_length used to initialize the tree didn't include space for them.
- */
- return tree->unique_add(table->record[0] + table->s->null_bytes);
- }
- if ((error= table->file->ha_write_row(table->record[0])) &&
- table->file->is_fatal_error(error, HA_CHECK_DUP))
- return TRUE;
- return FALSE;
-}
-
-
-longlong Item_sum_count_distinct::val_int()
-{
- int error;
- DBUG_ASSERT(fixed == 1);
- if (!table) // Empty query
- return LL(0);
- if (tree)
- {
- if (is_evaluated)
- return count;
-
- if (tree->elements == 0)
- return (longlong) tree->elements_in_tree(); // everything fits in memory
- count= 0;
- tree->walk(count_distinct_walk, (void*) &count);
- is_evaluated= TRUE;
- return (longlong) count;
- }
-
- error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
-
- if(error)
- {
- table->file->print_error(error, MYF(0));
- }
-
- return table->file->stats.records;
-}
-
-
-/****************************************************************************
** Functions to handle dynamic loadable aggregates
** Original source by: Alexis Mikhailov <root@medinf.chuvashia.su>
** Adapted for UDAs by: Andreas F. Bobak <bobak@relog.ch>.
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 8a20e2dd165..1dd1c5b5dd5 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -22,7 +22,87 @@
#include <my_tree.h>
-/*
+class Item_sum;
+class Aggregator_distinct;
+class Aggregator_simple;
+
+/**
+ The abstract base class for the Aggregator_* classes.
+ It implements the data collection functions (setup/add/clear)
+ as either pass-through to the real functionality or
+ as collectors into an Unique (for distinct) structure.
+
+ Note that update_field/reset_field are not in that
+ class, because they're simply not called when
+ GROUP BY/DISTINCT can be handled with help of index on grouped
+ fields (quick_group = 0);
+*/
+
+class Aggregator : public Sql_alloc
+{
+ friend class Item_sum;
+ friend class Item_sum_sum;
+ friend class Item_sum_count;
+ friend class Item_sum_avg;
+
+ /*
+ All members are protected as this class is not usable outside of an
+ Item_sum descendant.
+ */
+protected:
+ /* the aggregate function class to act on */
+ Item_sum *item_sum;
+
+ /**
+ When feeding back the data in endup() from Unique/temp table back to
+ Item_sum::add() methods we must read the data from Unique (and not
+ recalculate the functions that are given as arguments to the aggregate
+ function.
+ This flag is to tell the add() methods to take the data from the Unique
+ instead by calling the relevant val_..() method
+ */
+
+ bool use_distinct_values;
+
+public:
+ Aggregator (Item_sum *arg): item_sum(arg), use_distinct_values(FALSE) {}
+ virtual ~Aggregator () {} /* Keep gcc happy */
+
+ enum Aggregator_type { SIMPLE_AGGREGATOR, DISTINCT_AGGREGATOR };
+ virtual Aggregator_type Aggrtype() = 0;
+
+ /**
+ Called before adding the first row.
+ Allocates and sets up the internal aggregation structures used,
+ e.g. the Unique instance used to calculate distinct.
+ */
+ virtual bool setup(THD *) = 0;
+
+ /**
+ Called when we need to wipe out all the data from the aggregator :
+ all the values acumulated and all the state.
+ Cleans up the internal structures and resets them to their initial state.
+ */
+ virtual void clear() = 0;
+
+ /**
+ Called when there's a new value to be aggregated.
+ Updates the internal state of the aggregator to reflect the new value.
+ */
+ virtual bool add() = 0;
+
+ /**
+ Called when there are no more data and the final value is to be retrieved.
+ Finalises the state of the aggregator, so the final result can be retrieved.
+ */
+ virtual void endup() = 0;
+
+};
+
+
+class st_select_lex;
+
+/**
Class Item_sum is the base class used for special expressions that SQL calls
'set functions'. These expressions are formed with the help of aggregate
functions such as SUM, MAX, GROUP_CONCAT etc.
@@ -215,13 +295,32 @@
TODO: to catch queries where the limit is exceeded to make the
code clean here.
-*/
-
-class st_select_lex;
+*/
class Item_sum :public Item_result_field
{
public:
+ /**
+ Aggregator class instance. Not set initially. Allocated only after
+ it is determined if the incoming data are already distinct.
+ */
+ Aggregator *aggr;
+
+ /**
+ Used in making ROLLUP. Set for the ROLLUP copies of the original
+ Item_sum and passed to create_tmp_field() to cause it to work
+ over the temp table buffer that is referenced by
+ Item_result_field::result_field.
+ */
+ bool force_copy_fields;
+
+ /**
+ Indicates how the aggregate function was specified by the parser :
+ 1 if it was written as AGGREGATE(DISTINCT),
+ 0 if it was AGGREGATE()
+ */
+ bool with_distinct;
+
enum Sumfunctype
{ COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
@@ -263,47 +362,28 @@ public:
Item_sum() :quick_group(1), arg_count(0), forced_const(FALSE)
{
mark_as_sum_func();
+ init_aggregator();
}
Item_sum(Item *a) :quick_group(1), arg_count(1), args(tmp_args),
orig_args(tmp_orig_args), forced_const(FALSE)
{
args[0]=a;
mark_as_sum_func();
+ init_aggregator();
}
Item_sum( Item *a, Item *b ) :quick_group(1), arg_count(2), args(tmp_args),
orig_args(tmp_orig_args), forced_const(FALSE)
{
args[0]=a; args[1]=b;
mark_as_sum_func();
+ init_aggregator();
}
Item_sum(List<Item> &list);
//Copy constructor, need to perform subselects with temporary tables
Item_sum(THD *thd, Item_sum *item);
enum Type type() const { return SUM_FUNC_ITEM; }
virtual enum Sumfunctype sum_func () const=0;
-
- /*
- This method is similar to add(), but it is called when the current
- aggregation group changes. Thus it performs a combination of
- clear() and add().
- */
- inline bool reset() { clear(); return add(); };
-
- /*
- Prepare this item for evaluation of an aggregate value. This is
- called by reset() when a group changes, or, for correlated
- subqueries, between subquery executions. E.g. for COUNT(), this
- method should set count= 0;
- */
- virtual void clear()= 0;
-
- /*
- This method is called for the next row in the same group. Its
- purpose is to aggregate the new value to the previous values in
- the group (i.e. since clear() was called last time). For example,
- for COUNT(), do count++.
- */
- virtual bool add()=0;
+ inline bool reset() { aggregator_clear(); return aggregator_add(); };
/*
Called when new group is started and results are being saved in
@@ -343,11 +423,6 @@ public:
{ return new Item_field(field); }
table_map used_tables() const { return used_tables_cache; }
void update_used_tables ();
- void cleanup()
- {
- Item::cleanup();
- forced_const= FALSE;
- }
bool is_null() { return null_value; }
void make_const ()
{
@@ -359,7 +434,9 @@ public:
virtual void print(String *str, enum_query_type query_type);
void fix_num_length_and_dec();
- /*
+ /**
+ Mark an aggregate as having no rows.
+
This function is called by the execution engine to assign 'NO ROWS
FOUND' value to an aggregate item, when the underlying result set
has no rows. Such value, in a general case, may be different from
@@ -367,10 +444,15 @@ public:
may be initialized to 0 by clear() and to NULL by
no_rows_in_result().
*/
- void no_rows_in_result() { clear(); }
-
- virtual bool setup(THD *thd) {return 0;}
- virtual void make_unique() {}
+ void no_rows_in_result()
+ {
+ if (!aggr)
+ set_aggregator(with_distinct ?
+ Aggregator::DISTINCT_AGGREGATOR :
+ Aggregator::SIMPLE_AGGREGATOR);
+ reset();
+ }
+ virtual void make_unique() { force_copy_fields= TRUE; }
Item *get_tmp_table_item(THD *thd);
virtual Field *create_tmp_field(bool group, TABLE *table,
uint convert_blob_length);
@@ -381,14 +463,178 @@ public:
st_select_lex *depended_from()
{ return (nest_level == aggr_level ? 0 : aggr_sel); }
- Item *get_arg(int i) { return args[i]; }
- Item *set_arg(int i, THD *thd, Item *new_val);
+ Item *get_arg(uint i) { return args[i]; }
+ Item *set_arg(uint i, THD *thd, Item *new_val);
uint get_arg_count() { return arg_count; }
+
+ /* Initialization of distinct related members */
+ void init_aggregator()
+ {
+ aggr= NULL;
+ with_distinct= FALSE;
+ force_copy_fields= FALSE;
+ }
+
+ /**
+ Called to initialize the aggregator.
+ */
+
+ inline bool aggregator_setup(THD *thd) { return aggr->setup(thd); };
+
+ /**
+ Called to cleanup the aggregator.
+ */
+
+ inline void aggregator_clear() { aggr->clear(); }
+
+ /**
+ Called to add value to the aggregator.
+ */
+
+ inline bool aggregator_add() { return aggr->add(); };
+
+ /* stores the declared DISTINCT flag (from the parser) */
+ void set_distinct(bool distinct)
+ {
+ with_distinct= distinct;
+ quick_group= with_distinct ? 0 : 1;
+ }
+
+ /**
+ Set the type of aggregation : DISTINCT or not.
+
+ Called when the final determination is done about the aggregation
+ type and the object is about to be used.
+ */
+
+ int set_aggregator(Aggregator::Aggregator_type aggregator);
+ virtual void clear()= 0;
+ virtual bool add()= 0;
+ virtual bool setup(THD *thd) {return 0;}
+
+ void cleanup ();
+};
+
+
+class Unique;
+
+
+/**
+ The distinct aggregator.
+ Implements AGGFN (DISTINCT ..)
+ Collects all the data into an Unique (similarly to what Item_sum_distinct
+ does currently) and then (if applicable) iterates over the list of
+ unique values and pumps them back into its object
+*/
+
+class Aggregator_distinct : public Aggregator
+{
+ friend class Item_sum_sum;
+ friend class Item_sum_count;
+ friend class Item_sum_avg;
+protected:
+
+ /*
+ flag to prevent consecutive runs of endup(). Normally in endup there are
+ expensive calculations (like walking the distinct tree for example)
+ which we must do only once if there are no data changes.
+ We can re-use the data for the second and subsequent val_xxx() calls.
+ endup_done set to TRUE also means that the calculated values for
+ the aggregate functions are correct and don't need recalculation.
+ */
+ bool endup_done;
+
+ /*
+ Used depending on the type of the aggregate function and the presence of
+ blob columns in it:
+ - For COUNT(DISTINCT) and no blob fields this points to a real temporary
+ table. It's used as a hash table.
+ - For AVG/SUM(DISTINCT) or COUNT(DISTINCT) with blob fields only the
+ in-memory data structure of a temporary table is constructed.
+ It's used by the Field classes to transform data into row format.
+ */
+ TABLE *table;
+
+ /*
+ An array of field lengths on row allocated and used only for
+ COUNT(DISTINCT) with multiple columns and no blobs. Used in
+ Aggregator_distinct::composite_key_cmp (called from Unique to compare
+ nodes
+ */
+ uint32 *field_lengths;
+
+ /*
+ used in conjunction with 'table' to support the access to Field classes
+ for COUNT(DISTINCT). Needed by copy_fields()/copy_funcs().
+ */
+ TMP_TABLE_PARAM *tmp_table_param;
+
+ /*
+ If there are no blobs in the COUNT(DISTINCT) arguments, we can use a tree,
+ which is faster than heap table. In that case, we still use the table
+ to help get things set up, but we insert nothing in it.
+ For AVG/SUM(DISTINCT) we always use this tree (as it takes a single
+ argument) to get the distinct rows.
+ */
+ Unique *tree;
+
+ /*
+ The length of the temp table row. Must be a member of the class as it
+ gets passed down to simple_raw_key_cmp () as a compare function argument
+ to Unique. simple_raw_key_cmp () is used as a fast comparison function
+ when the entire row can be binary compared.
+ */
+ uint tree_key_length;
+
+ /*
+ Set to true if the result is known to be always NULL.
+ If set deactivates creation and usage of the temporary table (in the
+ 'table' member) and the Unique instance (in the 'tree' member) as well as
+ the calculation of the final value on the first call to
+ Item_[sum|avg|count]::val_xxx().
+ */
+ bool always_null;
+
+public:
+ Aggregator_distinct (Item_sum *sum) :
+ Aggregator(sum), table(NULL), tmp_table_param(NULL), tree(NULL),
+ always_null(FALSE) {}
+ virtual ~Aggregator_distinct ();
+ Aggregator_type Aggrtype() { return DISTINCT_AGGREGATOR; }
+
+ bool setup(THD *);
+ void clear();
+ bool add();
+ void endup();
+
+ bool unique_walk_function(void *element);
+ static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
+};
+
+
+/**
+ The pass-through aggregator.
+ Implements AGGFN (DISTINCT ..) by knowing it gets distinct data on input.
+ So it just pumps them back to the Item_sum descendant class.
+*/
+class Aggregator_simple : public Aggregator
+{
+public:
+
+ Aggregator_simple (Item_sum *sum) :
+ Aggregator(sum) {}
+ Aggregator_type Aggrtype() { return Aggregator::SIMPLE_AGGREGATOR; }
+
+ bool setup(THD * thd) { return item_sum->setup(thd); }
+ void clear() { item_sum->clear(); }
+ bool add() { return item_sum->add(); }
+ void endup() {};
};
class Item_sum_num :public Item_sum
{
+ friend class Aggregator_distinct;
protected:
/*
val_xxx() functions may be called several times during the execution of a
@@ -443,9 +689,15 @@ protected:
void fix_length_and_dec();
public:
- Item_sum_sum(Item *item_par) :Item_sum_num(item_par) {}
+ Item_sum_sum(Item *item_par, bool distinct= FALSE) :Item_sum_num(item_par)
+ {
+ set_distinct(distinct);
+ }
Item_sum_sum(THD *thd, Item_sum_sum *item);
- enum Sumfunctype sum_func () const {return SUM_FUNC;}
+ enum Sumfunctype sum_func () const
+ {
+ return with_distinct ? SUM_DISTINCT_FUNC : SUM_FUNC;
+ }
void clear();
bool add();
double val_real();
@@ -456,109 +708,50 @@ public:
void reset_field();
void update_field();
void no_rows_in_result() {}
- const char *func_name() const { return "sum("; }
+ const char *func_name() const
+ {
+ return with_distinct ? "sum(distinct " : "sum(";
+ }
Item *copy_or_same(THD* thd);
};
-
-/* Common class for SUM(DISTINCT), AVG(DISTINCT) */
-
-class Unique;
-
-class Item_sum_distinct :public Item_sum_num
+class Item_sum_count :public Item_sum_int
{
-protected:
- /* storage for the summation result */
- ulonglong count;
- Hybrid_type val;
- /* storage for unique elements */
- Unique *tree;
- TABLE *table;
- enum enum_field_types table_field_type;
- uint tree_key_length;
-protected:
- Item_sum_distinct(THD *thd, Item_sum_distinct *item);
-public:
- Item_sum_distinct(Item *item_par);
- ~Item_sum_distinct();
+ longlong count;
+
+ friend class Aggregator_distinct;
- bool setup(THD *thd);
void clear();
- void cleanup();
bool add();
- double val_real();
- my_decimal *val_decimal(my_decimal *);
- longlong val_int();
- String *val_str(String *str);
-
- /* XXX: does it need make_unique? */
-
- enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
- void reset_field() {} // not used
- void update_field() {} // not used
- virtual void no_rows_in_result() {}
- void fix_length_and_dec();
- enum Item_result result_type () const { return val.traits->type(); }
- virtual void calculate_val_and_count();
- virtual bool unique_walk_function(void *elem);
-};
-
-
-/*
- Item_sum_sum_distinct - implementation of SUM(DISTINCT expr).
- See also: MySQL manual, chapter 'Adding New Functions To MySQL'
- and comments in item_sum.cc.
-*/
-
-class Item_sum_sum_distinct :public Item_sum_distinct
-{
-private:
- Item_sum_sum_distinct(THD *thd, Item_sum_sum_distinct *item)
- :Item_sum_distinct(thd, item) {}
-public:
- Item_sum_sum_distinct(Item *item_arg) :Item_sum_distinct(item_arg) {}
-
- enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
- const char *func_name() const { return "sum(distinct "; }
- Item *copy_or_same(THD* thd) { return new Item_sum_sum_distinct(thd, this); }
-};
-
-
-/* Item_sum_avg_distinct - SELECT AVG(DISTINCT expr) FROM ... */
-
-class Item_sum_avg_distinct: public Item_sum_distinct
-{
-private:
- Item_sum_avg_distinct(THD *thd, Item_sum_avg_distinct *original)
- :Item_sum_distinct(thd, original) {}
-public:
- uint prec_increment;
- Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
-
- void fix_length_and_dec();
- virtual void calculate_val_and_count();
- enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
- const char *func_name() const { return "avg(distinct "; }
- Item *copy_or_same(THD* thd) { return new Item_sum_avg_distinct(thd, this); }
-};
-
-
-class Item_sum_count :public Item_sum_int
-{
- longlong count;
+ void cleanup();
public:
Item_sum_count(Item *item_par)
:Item_sum_int(item_par),count(0)
{}
+
+ /**
+ Constructs an instance for COUNT(DISTINCT)
+
+ @param list a list of the arguments to the aggregate function
+
+ This constructor is called by the parser only for COUNT (DISTINCT).
+ */
+
+ Item_sum_count(List<Item> &list)
+ :Item_sum_int(list),count(0)
+ {
+ set_distinct(TRUE);
+ }
Item_sum_count(THD *thd, Item_sum_count *item)
:Item_sum_int(thd, item), count(item->count)
{}
- enum Sumfunctype sum_func () const { return COUNT_FUNC; }
- void clear();
+ enum Sumfunctype sum_func () const
+ {
+ return with_distinct ? COUNT_DISTINCT_FUNC : COUNT_FUNC;
+ }
void no_rows_in_result() { count=0; }
- bool add();
void make_const(longlong count_arg)
{
count=count_arg;
@@ -566,76 +759,12 @@ class Item_sum_count :public Item_sum_int
}
longlong val_int();
void reset_field();
- void cleanup();
void update_field();
- const char *func_name() const { return "count("; }
- Item *copy_or_same(THD* thd);
-};
-
-
-class TMP_TABLE_PARAM;
-
-class Item_sum_count_distinct :public Item_sum_int
-{
- TABLE *table;
- uint32 *field_lengths;
- TMP_TABLE_PARAM *tmp_table_param;
- bool force_copy_fields;
- /*
- If there are no blobs, we can use a tree, which
- is faster than heap table. In that case, we still use the table
- to help get things set up, but we insert nothing in it
- */
- Unique *tree;
- /*
- Storage for the value of count between calls to val_int() so val_int()
- will not recalculate on each call. Validitiy of the value is stored in
- is_evaluated.
- */
- longlong count;
- /*
- Following is 0 normal object and pointer to original one for copy
- (to correctly free resources)
- */
- Item_sum_count_distinct *original;
- uint tree_key_length;
-
-
- bool always_null; // Set to 1 if the result is always NULL
-
-
- friend int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
- friend int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2);
-
-public:
- Item_sum_count_distinct(List<Item> &list)
- :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
- force_copy_fields(0), tree(0), count(0),
- original(0), always_null(FALSE)
- { quick_group= 0; }
- Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
- :Item_sum_int(thd, item), table(item->table),
- field_lengths(item->field_lengths),
- tmp_table_param(item->tmp_table_param),
- force_copy_fields(0), tree(item->tree), count(item->count),
- original(item), tree_key_length(item->tree_key_length),
- always_null(item->always_null)
- {}
- ~Item_sum_count_distinct();
-
- void cleanup();
-
- enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
- void clear();
- bool add();
- longlong val_int();
- void reset_field() { return ;} // Never called
- void update_field() { return ; } // Never called
- const char *func_name() const { return "count(distinct "; }
- bool setup(THD *thd);
- void make_unique();
+ const char *func_name() const
+ {
+ return with_distinct ? "count(distinct " : "count(";
+ }
Item *copy_or_same(THD* thd);
- void no_rows_in_result() {}
};
@@ -674,13 +803,18 @@ public:
uint prec_increment;
uint f_precision, f_scale, dec_bin_size;
- Item_sum_avg(Item *item_par) :Item_sum_sum(item_par), count(0) {}
+ Item_sum_avg(Item *item_par, bool distinct= FALSE)
+ :Item_sum_sum(item_par, distinct), count(0)
+ {}
Item_sum_avg(THD *thd, Item_sum_avg *item)
:Item_sum_sum(thd, item), count(item->count),
prec_increment(item->prec_increment) {}
void fix_length_and_dec();
- enum Sumfunctype sum_func () const {return AVG_FUNC;}
+ enum Sumfunctype sum_func () const
+ {
+ return with_distinct ? AVG_DISTINCT_FUNC : AVG_FUNC;
+ }
void clear();
bool add();
double val_real();
@@ -693,7 +827,10 @@ public:
Item *result_item(Field *field)
{ return new Item_avg_field(hybrid_type, this); }
void no_rows_in_result() {}
- const char *func_name() const { return "avg("; }
+ const char *func_name() const
+ {
+ return with_distinct ? "avg(distinct " : "avg(";
+ }
Item *copy_or_same(THD* thd);
Field *create_tmp_field(bool group, TABLE *table, uint convert_blob_length);
void cleanup()
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 05575e2744b..52e8d5d61c2 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -708,7 +708,8 @@ static
TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
double read_time);
static
-TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree);
+TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
+ double read_time);
static double get_index_only_read_time(const PARAM* param, ha_rows records,
int keynr);
@@ -2049,7 +2050,7 @@ public:
class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
{
private:
- bool have_min, have_max;
+ bool have_min, have_max, have_agg_distinct;
KEY_PART_INFO *min_max_arg_part;
uint group_prefix_len;
uint used_key_parts;
@@ -2061,11 +2062,13 @@ private:
SEL_TREE *range_tree; /* Represents all range predicates in the query. */
SEL_ARG *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */
uint param_idx; /* Index of used key in param->key. */
- /* Number of records selected by the ranges in index_tree. */
+ bool is_index_scan; /* Use index_next() instead of random read */
public:
+ /* Number of records selected by the ranges in index_tree. */
ha_rows quick_prefix_records;
public:
- TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
+ TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
+ bool have_agg_distinct_arg,
KEY_PART_INFO *min_max_arg_part_arg,
uint group_prefix_len_arg, uint used_key_parts_arg,
uint group_key_parts_arg, KEY *index_info_arg,
@@ -2074,11 +2077,12 @@ public:
SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
uint param_idx_arg, ha_rows quick_prefix_records_arg)
: have_min(have_min_arg), have_max(have_max_arg),
+ have_agg_distinct(have_agg_distinct_arg),
min_max_arg_part(min_max_arg_part_arg),
group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
group_key_parts(group_key_parts_arg), index_info(index_info_arg),
index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
- index_tree(index_tree_arg), param_idx(param_idx_arg),
+ index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
quick_prefix_records(quick_prefix_records_arg)
{
if (key_infix_len)
@@ -2088,6 +2092,7 @@ public:
QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
MEM_ROOT *parent_alloc);
+ void use_index_scan() { is_index_scan= TRUE; }
};
@@ -2349,7 +2354,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
Notice that it can be constructed no matter if there is a range tree.
*/
- group_trp= get_best_group_min_max(&param, tree);
+ group_trp= get_best_group_min_max(&param, tree, best_read_time);
if (group_trp)
{
param.table->quick_condition_rows= min(group_trp->records,
@@ -9048,15 +9053,10 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
double *read_cost, ha_rows *records);
-/*
+/**
Test if this access method is applicable to a GROUP query with MIN/MAX
functions, and if so, construct a new TRP object.
- SYNOPSIS
- get_best_group_min_max()
- param Parameter from test_quick_select
- sel_tree Range tree generated by get_mm_tree
-
DESCRIPTION
Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
@@ -9167,17 +9167,16 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
- Lift the limitation in condition (B3), that is, make this access method
applicable to ROLLUP queries.
- RETURN
- If mem_root != NULL
- - valid TRP_GROUP_MIN_MAX object if this QUICK class can be used for
- the query
- - NULL o/w.
- If mem_root == NULL
- - NULL
+ @param param Parameter from test_quick_select
+ @param sel_tree Range tree generated by get_mm_tree
+ @param read_time Best read time so far (=table/index scan time)
+ @return table read plan
+ @retval NULL Loose index scan not applicable or mem_root == NULL
+ @retval !NULL Loose index scan table read plan
*/
static TRP_GROUP_MIN_MAX *
-get_best_group_min_max(PARAM *param, SEL_TREE *tree)
+get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
{
THD *thd= param->thd;
JOIN *join= thd->lex->current_select->join;
@@ -9198,25 +9197,33 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
ORDER *tmp_group;
Item *item;
Item_field *item_field;
+ bool is_agg_distinct;
+ List<Item_field> agg_distinct_flds;
+
DBUG_ENTER("get_best_group_min_max");
/* Perform few 'cheap' tests whether this access method is applicable. */
if (!join)
DBUG_RETURN(NULL); /* This is not a select statement. */
if ((join->tables != 1) || /* The query must reference one table. */
- ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
- (!join->select_distinct)) ||
(join->select_lex->olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
DBUG_RETURN(NULL);
if (table->s->keys == 0) /* There are no indexes to use. */
DBUG_RETURN(NULL);
- /* Analyze the query in more detail. */
- List_iterator<Item> select_items_it(join->fields_list);
-
/* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
DBUG_RETURN(NULL);
+
+ List_iterator<Item> select_items_it(join->fields_list);
+ is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
+
+ if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
+ (!join->select_distinct) &&
+ !is_agg_distinct)
+ DBUG_RETURN(NULL);
+ /* Analyze the query in more detail. */
+
if (join->sum_funcs[0])
{
Item_sum *min_max_item;
@@ -9227,6 +9234,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
have_min= TRUE;
else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
have_max= TRUE;
+ else if (min_max_item->sum_func() == Item_sum::COUNT_DISTINCT_FUNC ||
+ min_max_item->sum_func() == Item_sum::SUM_DISTINCT_FUNC ||
+ min_max_item->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
+ continue;
else
DBUG_RETURN(NULL);
@@ -9243,13 +9254,12 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
DBUG_RETURN(NULL);
}
}
-
/* Check (SA5). */
if (join->select_distinct)
{
while ((item= select_items_it++))
{
- if (item->type() != Item::FIELD_ITEM)
+ if (item->real_item()->type() != Item::FIELD_ITEM)
DBUG_RETURN(NULL);
}
}
@@ -9257,7 +9267,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
/* Check (GA4) - that there are no expressions among the group attributes. */
for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
{
- if ((*tmp_group->item)->type() != Item::FIELD_ITEM)
+ if ((*tmp_group->item)->real_item()->type() != Item::FIELD_ITEM)
DBUG_RETURN(NULL);
}
@@ -9276,6 +9286,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
uint best_param_idx= 0;
const uint pk= param->table->s->primary_key;
+ uint max_key_part;
SEL_ARG *cur_index_tree= NULL;
ha_rows cur_quick_prefix_records= 0;
uint cur_param_idx=MAX_KEY;
@@ -9329,6 +9340,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
}
}
+ max_key_part= 0;
+ used_key_parts_map.clear_all();
/*
Check (GA1) for GROUP BY queries.
*/
@@ -9352,6 +9365,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
{
cur_group_prefix_len+= cur_part->store_length;
++cur_group_key_parts;
+ max_key_part= cur_part - cur_index_info->key_part + 1;
+ used_key_parts_map.set_bit(max_key_part);
}
else
goto next_index;
@@ -9365,14 +9380,26 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
Later group_fields_array of ORDER objects is used to convert the query
to a GROUP query.
*/
- else if (join->select_distinct)
+ if ((!join->group_list && join->select_distinct) ||
+ is_agg_distinct)
{
- select_items_it.rewind();
- used_key_parts_map.clear_all();
- uint max_key_part= 0;
- while ((item= select_items_it++))
+ if (!is_agg_distinct)
{
- item_field= (Item_field*) item; /* (SA5) already checked above. */
+ select_items_it.rewind();
+ }
+
+ List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
+ while (NULL != (item = (is_agg_distinct ?
+ (Item *) agg_distinct_flds_it++ : select_items_it++)))
+ {
+ /* (SA5) already checked above. */
+ item_field= (Item_field*) item->real_item();
+ DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
+
+ /* not doing loose index scan for derived tables */
+ if (!item_field->field)
+ goto next_index;
+
/* Find the order of the key part in the index. */
key_part_nr= get_field_keypart(cur_index_info, item_field->field);
/*
@@ -9381,7 +9408,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
*/
if (used_key_parts_map.is_set(key_part_nr))
continue;
- if (key_part_nr < 1 || key_part_nr > join->fields_list.elements)
+ if (key_part_nr < 1 ||
+ (!is_agg_distinct && key_part_nr > join->fields_list.elements))
goto next_index;
cur_part= cur_index_info->key_part + key_part_nr - 1;
cur_group_prefix_len+= cur_part->store_length;
@@ -9401,10 +9429,6 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
if (all_parts != cur_parts)
goto next_index;
}
- else
- {
- DBUG_ASSERT(FALSE);
- }
/* Check (SA2). */
if (min_max_arg_item)
@@ -9558,7 +9582,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
/* The query passes all tests, so construct a new TRP object. */
read_plan= new (param->mem_root)
- TRP_GROUP_MIN_MAX(have_min, have_max, min_max_arg_part,
+ TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
+ min_max_arg_part,
group_prefix_len, used_key_parts,
group_key_parts, index_info, index,
key_infix_len,
@@ -9572,6 +9597,11 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree)
read_plan->read_cost= best_read_cost;
read_plan->records= best_records;
+ if (read_time < best_read_cost && is_agg_distinct)
+ {
+ read_plan->read_cost= 0;
+ read_plan->use_index_scan();
+ }
DBUG_PRINT("info",
("Returning group min/max plan: cost: %g, records: %lu",
@@ -10077,11 +10107,12 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
param->thd->lex->current_select->join,
- have_min, have_max, min_max_arg_part,
+ have_min, have_max,
+ have_agg_distinct, min_max_arg_part,
group_prefix_len, group_key_parts,
used_key_parts, index_info, index,
read_cost, records, key_infix_len,
- key_infix, parent_alloc);
+ key_infix, parent_alloc, is_index_scan);
if (!quick)
DBUG_RETURN(NULL);
@@ -10161,6 +10192,9 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
key_infix_len Length of the key infix appended to the group prefix
key_infix Infix of constants from equality predicates
parent_alloc Memory pool for this and quick_prefix_select data
+ is_index_scan get the next different key not by jumping on it via
+ index read, but by scanning until the end of the
+ rows with equal key value.
RETURN
None
@@ -10168,20 +10202,22 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
- bool have_max_arg,
+ bool have_max_arg, bool have_agg_distinct_arg,
KEY_PART_INFO *min_max_arg_part_arg,
uint group_prefix_len_arg, uint group_key_parts_arg,
uint used_key_parts_arg, KEY *index_info_arg,
uint use_index, double read_cost_arg,
ha_rows records_arg, uint key_infix_len_arg,
- uchar *key_infix_arg, MEM_ROOT *parent_alloc)
+ uchar *key_infix_arg, MEM_ROOT *parent_alloc,
+ bool is_index_scan_arg)
:join(join_arg), index_info(index_info_arg),
group_prefix_len(group_prefix_len_arg),
group_key_parts(group_key_parts_arg), have_min(have_min_arg),
- have_max(have_max_arg), seen_first_key(FALSE),
- min_max_arg_part(min_max_arg_part_arg), key_infix(key_infix_arg),
- key_infix_len(key_infix_len_arg), min_functions_it(NULL),
- max_functions_it(NULL)
+ have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
+ seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
+ key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
+ min_functions_it(NULL), max_functions_it(NULL),
+ is_index_scan(is_index_scan_arg)
{
head= table;
file= head->file;
@@ -10744,6 +10780,56 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_max()
}
+/**
+ Find the next different key value by skiping all the rows with the same key
+ value.
+
+ Implements a specialized loose index access method for queries
+ containing aggregate functions with distinct of the form:
+ SELECT [SUM|COUNT|AVG](DISTINCT a,...) FROM t
+ This method comes to replace the index scan + Unique class
+ (distinct selection) for loose index scan that visits all the rows of a
+ covering index instead of jumping in the begining of each group.
+ TODO: Placeholder function. To be replaced by a handler API call
+
+ @param is_index_scan hint to use index scan instead of random index read
+ to find the next different value.
+ @param file table handler
+ @param key_part group key to compare
+ @param record row data
+ @param group_prefix current key prefix data
+ @param group_prefix_len length of the current key prefix data
+ @param group_key_parts number of the current key prefix columns
+ @return status
+ @retval 0 success
+ @retval !0 failure
+*/
+
+static int index_next_different (bool is_index_scan, handler *file,
+ KEY_PART_INFO *key_part, uchar * record,
+ const uchar * group_prefix,
+ uint group_prefix_len,
+ uint group_key_parts)
+{
+ if (is_index_scan)
+ {
+ int result= 0;
+
+ while (!key_cmp (key_part, group_prefix, group_prefix_len))
+ {
+ result= file->index_next(record);
+ if (result)
+ return(result);
+ }
+ return result;
+ }
+ else
+ return file->index_read_map(record, group_prefix,
+ make_prev_keypart_map(group_key_parts),
+ HA_READ_AFTER_KEY);
+}
+
+
/*
Determine the prefix of the next group.
@@ -10790,9 +10876,9 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
else
{
/* Load the first key in this group into record. */
- result= file->index_read_map(record, group_prefix,
- make_prev_keypart_map(group_key_parts),
- HA_READ_AFTER_KEY);
+ result= index_next_different (is_index_scan, file, index_info->key_part,
+ record, group_prefix, group_prefix_len,
+ group_key_parts);
if (result)
DBUG_RETURN(result);
}
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 8d2ba1bb0a6..393ffcb2115 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -616,6 +616,7 @@ private:
uchar *last_prefix; /* Prefix of the last group for detecting EOF. */
bool have_min; /* Specify whether we are computing */
bool have_max; /* a MIN, a MAX, or both. */
+ bool have_agg_distinct;/* aggregate_function(DISTINCT ...). */
bool seen_first_key; /* Denotes whether the first key was retrieved.*/
KEY_PART_INFO *min_max_arg_part; /* The keypart of the only argument field */
/* of all MIN/MAX functions. */
@@ -629,6 +630,11 @@ private:
List<Item_sum> *max_functions;
List_iterator<Item_sum> *min_functions_it;
List_iterator<Item_sum> *max_functions_it;
+ /*
+ Use index scan to get the next different key instead of jumping into it
+ through index read
+ */
+ bool is_index_scan;
public:
/*
The following two members are public to allow easy access from
@@ -646,12 +652,13 @@ private:
void update_max_result();
public:
QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join, bool have_min,
- bool have_max, KEY_PART_INFO *min_max_arg_part,
+ bool have_max, bool have_agg_distinct,
+ KEY_PART_INFO *min_max_arg_part,
uint group_prefix_len, uint group_key_parts,
uint used_key_parts, KEY *index_info, uint
use_index, double read_cost, ha_rows records, uint
key_infix_len, uchar *key_infix, MEM_ROOT
- *parent_alloc);
+ *parent_alloc, bool is_index_scan);
~QUICK_GROUP_MIN_MAX_SELECT();
bool add_range(SEL_ARG *sel_range);
void update_key_stat();
@@ -667,6 +674,12 @@ public:
#ifndef DBUG_OFF
void dbug_dump(int indent, bool verbose);
#endif
+ bool is_agg_distinct() { return have_agg_distinct; }
+ virtual void append_loose_scan_type(String *str)
+ {
+ if (is_index_scan)
+ str->append(STRING_WITH_LEN(" (scanning)"));
+ }
};
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index 8e7265ba1ad..d995faf1ab6 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -355,10 +355,13 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
const_result= 0;
break;
}
+ item_sum->set_aggregator (item_sum->with_distinct ?
+ Aggregator::DISTINCT_AGGREGATOR :
+ Aggregator::SIMPLE_AGGREGATOR);
if (!count)
{
/* If count == 0, then we know that is_exact_count == TRUE. */
- ((Item_sum_min*) item_sum)->clear(); /* Set to NULL. */
+ ((Item_sum_min*) item_sum)->aggregator_clear(); /* Set to NULL. */
}
else
((Item_sum_min*) item_sum)->reset(); /* Set to the constant value. */
@@ -443,10 +446,13 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
const_result= 0;
break;
}
+ item_sum->set_aggregator (item_sum->with_distinct ?
+ Aggregator::DISTINCT_AGGREGATOR :
+ Aggregator::SIMPLE_AGGREGATOR);
if (!count)
{
/* If count != 1, then we know that is_exact_count == TRUE. */
- ((Item_sum_max*) item_sum)->clear(); /* Set to NULL. */
+ ((Item_sum_max*) item_sum)->aggregator_clear(); /* Set to NULL. */
}
else
((Item_sum_max*) item_sum)->reset(); /* Set to the constant value. */
diff --git a/sql/sql_class.h b/sql/sql_class.h
index e845b5a727c..4874801e380 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -54,6 +54,7 @@
class Reprepare_observer
{
public:
+ Reprepare_observer() {}
/**
Check if a change of metadata is OK. In future
the signature of this method may be extended to accept the old
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index db3a73aec74..3e2d85e4951 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -222,6 +222,7 @@ static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
+static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct);
static bool init_sum_functions(Item_sum **func, Item_sum **end);
static bool update_sum_func(Item_sum **func);
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
@@ -1232,7 +1233,11 @@ JOIN::optimize()
if (test_if_subpart(group_list, order) ||
(!group_list && tmp_table_param.sum_func_count))
+ {
order=0;
+ if (is_indexed_agg_distinct(this, NULL))
+ sort_and_group= 0;
+ }
// Can't use sort on head table if using row cache
if (full_join)
@@ -1410,8 +1415,16 @@ JOIN::optimize()
single table queries, thus it is sufficient to test only the first
join_tab element of the plan for its access method.
*/
+ bool need_distinct= TRUE;
if (join_tab->is_using_loose_index_scan())
+ {
tmp_table_param.precomputed_group_by= TRUE;
+ if (join_tab->is_using_agg_loose_index_scan())
+ {
+ need_distinct= FALSE;
+ tmp_table_param.precomputed_group_by= FALSE;
+ }
+ }
/* Create a tmp table if distinct or if the sort is too complicated */
if (need_tmp)
@@ -1472,6 +1485,7 @@ JOIN::optimize()
HA_POS_ERROR, HA_POS_ERROR, FALSE) ||
alloc_group_fields(this, group_list) ||
make_sum_func_list(all_fields, fields_list, 1) ||
+ prepare_sum_aggregators(sum_funcs, need_distinct) ||
setup_sum_funcs(thd, sum_funcs))
{
DBUG_RETURN(1);
@@ -1481,6 +1495,7 @@ JOIN::optimize()
else
{
if (make_sum_func_list(all_fields, fields_list, 0) ||
+ prepare_sum_aggregators(sum_funcs, need_distinct) ||
setup_sum_funcs(thd, sum_funcs))
{
DBUG_RETURN(1);
@@ -1953,7 +1968,9 @@ JOIN::exec()
}
}
if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list,
- 1, TRUE))
+ 1, TRUE) ||
+ prepare_sum_aggregators(curr_join->sum_funcs,
+ !curr_join->join_tab->is_using_agg_loose_index_scan()))
DBUG_VOID_RETURN;
curr_join->group_list= 0;
if (!curr_join->sort_and_group &&
@@ -2056,6 +2073,8 @@ JOIN::exec()
if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list,
1, TRUE) ||
+ prepare_sum_aggregators (curr_join->sum_funcs, !curr_join->join_tab ||
+ !curr_join->join_tab->is_using_agg_loose_index_scan()) ||
setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
thd->is_fatal_error)
DBUG_VOID_RETURN;
@@ -3938,6 +3957,82 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
/**
+ Check for the presence of AGGFN(DISTINCT a) queries that may be subject
+ to loose index scan.
+
+
+ Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
+ (QUICK_GROUP_MIN_MAX_SELECT).
+ Optionally (if out_args is supplied) will push the arguments of
+ AGGFN(DISTINCT) to the list
+
+ @param join the join to check
+ @param[out] out_args list of aggregate function arguments
+ @return does the query qualify for indexed AGGFN(DISTINCT)
+ @retval true it does
+ @retval false AGGFN(DISTINCT) must apply distinct in it.
+*/
+
+bool
+is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
+{
+ Item_sum **sum_item_ptr;
+ bool result= false;
+
+ if (join->tables != 1 || /* reference more than 1 table */
+ join->select_distinct || /* or a DISTINCT */
+ join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
+ return false;
+
+ if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
+ return false;
+
+ for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
+ {
+ Item_sum *sum_item= *sum_item_ptr;
+ Item *expr;
+ /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
+ switch (sum_item->sum_func())
+ {
+ case Item_sum::MIN_FUNC:
+ case Item_sum::MAX_FUNC:
+ continue;
+ case Item_sum::COUNT_DISTINCT_FUNC:
+ break;
+ case Item_sum::AVG_DISTINCT_FUNC:
+ case Item_sum::SUM_DISTINCT_FUNC:
+ if (sum_item->get_arg_count() == 1)
+ break;
+ /* fall through */
+ default: return false;
+ }
+ /*
+ We arrive here for every COUNT(DISTINCT),AVG(DISTINCT) or SUM(DISTINCT).
+ Collect the arguments of the aggregate functions to a list.
+ We don't worry about duplicates as these will be sorted out later in
+ get_best_group_min_max
+ */
+ for (uint i= 0; i < sum_item->get_arg_count(); i++)
+ {
+ expr= sum_item->get_arg(i);
+ /* The AGGFN(DISTINCT) arg is not an attribute? */
+ if (expr->real_item()->type() != Item::FIELD_ITEM)
+ return false;
+
+ /*
+ If we came to this point the AGGFN(DISTINCT) loose index scan
+ optimization is applicable
+ */
+ if (out_args)
+ out_args->push_back((Item_field *) expr);
+ result= true;
+ }
+ }
+ return result;
+}
+
+
+/**
Discover the indexes that can be used for GROUP BY or DISTINCT queries.
If the query has a GROUP BY clause, find all indexes that contain all
@@ -3979,6 +4074,10 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
item->walk(&Item::collect_item_field_processor, 0,
(uchar*) &indexed_fields);
}
+ else if (is_indexed_agg_distinct(join, &indexed_fields))
+ {
+ join->sort_and_group= 1;
+ }
else
return;
@@ -10377,6 +10476,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list)
bzero(share, sizeof(*share));
table->field= field;
table->s= share;
+ table->temp_pool_slot= MY_BIT_NONE;
share->blob_field= blob_field;
share->fields= field_count;
share->blob_ptr_size= portable_sizeof_char_ptr;
@@ -14532,7 +14632,7 @@ setup_new_fields(THD *thd, List<Item> &fields,
optimize away 'order by'.
*/
-static ORDER *
+ORDER *
create_distinct_group(THD *thd, Item **ref_pointer_array,
ORDER *order_list, List<Item> &fields,
List<Item> &all_fields,
@@ -15334,7 +15434,22 @@ static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
DBUG_ENTER("setup_sum_funcs");
while ((func= *(func_ptr++)))
{
- if (func->setup(thd))
+ if (func->aggregator_setup(thd))
+ DBUG_RETURN(TRUE);
+ }
+ DBUG_RETURN(FALSE);
+}
+
+
+static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct)
+{
+ Item_sum *func;
+ DBUG_ENTER("setup_sum_funcs");
+ while ((func= *(func_ptr++)))
+ {
+ if (func->set_aggregator(need_distinct && func->with_distinct ?
+ Aggregator::DISTINCT_AGGREGATOR :
+ Aggregator::SIMPLE_AGGREGATOR))
DBUG_RETURN(TRUE);
}
DBUG_RETURN(FALSE);
@@ -15384,7 +15499,7 @@ init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr)
/* If rollup, calculate the upper sum levels */
for ( ; *func_ptr ; func_ptr++)
{
- if ((*func_ptr)->add())
+ if ((*func_ptr)->aggregator_add())
return 1;
}
return 0;
@@ -15396,7 +15511,7 @@ update_sum_func(Item_sum **func_ptr)
{
Item_sum *func;
for (; (func= (Item_sum*) *func_ptr) ; func_ptr++)
- if (func->add())
+ if (func->aggregator_add())
return 1;
return 0;
}
@@ -16313,7 +16428,12 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
if (key_read)
{
if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
+ {
+ QUICK_GROUP_MIN_MAX_SELECT *qgs=
+ (QUICK_GROUP_MIN_MAX_SELECT *) tab->select->quick;
extra.append(STRING_WITH_LEN("; Using index for group-by"));
+ qgs->append_loose_scan_type(&extra);
+ }
else
extra.append(STRING_WITH_LEN("; Using index"));
}
diff --git a/sql/sql_select.h b/sql/sql_select.h
index a0366d47149..bb751c600ed 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -218,6 +218,11 @@ typedef struct st_join_table {
(select->quick->get_type() ==
QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX));
}
+ bool is_using_agg_loose_index_scan ()
+ {
+ return (is_using_loose_index_scan() &&
+ ((QUICK_GROUP_MIN_MAX_SELECT *)select->quick)->is_agg_distinct());
+ }
} JOIN_TAB;
enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
@@ -564,6 +569,8 @@ Field* create_tmp_field_from_field(THD *thd, Field* org_field,
const char *name, TABLE *table,
Item_field *item, uint convert_blob_length);
+bool is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args);
+
/* functions from opt_sum.cc */
bool simple_pred(Item_func *func_item, Item **args, bool *inv_order);
int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds);
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 329158b7750..e7193d76d4e 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -8128,7 +8128,7 @@ sum_expr:
}
| AVG_SYM '(' DISTINCT in_sum_expr ')'
{
- $$= new (YYTHD->mem_root) Item_sum_avg_distinct($4);
+ $$= new (YYTHD->mem_root) Item_sum_avg($4, TRUE);
if ($$ == NULL)
MYSQL_YYABORT;
}
@@ -8171,7 +8171,7 @@ sum_expr:
{ Select->in_sum_expr--; }
')'
{
- $$= new (YYTHD->mem_root) Item_sum_count_distinct(* $5);
+ $$= new (YYTHD->mem_root) Item_sum_count(* $5);
if ($$ == NULL)
MYSQL_YYABORT;
}
@@ -8236,7 +8236,7 @@ sum_expr:
}
| SUM_SYM '(' DISTINCT in_sum_expr ')'
{
- $$= new (YYTHD->mem_root) Item_sum_sum_distinct($4);
+ $$= new (YYTHD->mem_root) Item_sum_sum($4, TRUE);
if ($$ == NULL)
MYSQL_YYABORT;
}