summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/field.h27
-rw-r--r--sql/filesort.cc195
-rw-r--r--sql/item_jsonfunc.cc82
-rw-r--r--sql/item_jsonfunc.h1
-rw-r--r--sql/item_sum.cc373
-rw-r--r--sql/item_sum.h15
-rw-r--r--sql/opt_range.cc4
-rw-r--r--sql/sql_class.h1
-rw-r--r--sql/sql_delete.cc6
-rw-r--r--sql/sql_sort.h17
-rw-r--r--sql/sql_statistics.cc114
-rw-r--r--sql/uniques.cc179
-rw-r--r--sql/uniques.h107
13 files changed, 1005 insertions, 116 deletions
diff --git a/sql/field.h b/sql/field.h
index e4c5ffcc0de..693d0b63ded 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1109,6 +1109,11 @@ public:
*/
virtual uint32 data_length() { return pack_length(); }
virtual uint32 sort_length() const { return pack_length(); }
+ /*
+ returns the sort_length for a field without the suffix length bytes
+ for fields with binary charset.
+ */
+ virtual uint32 sort_length_without_suffix() const { return pack_length(); }
/*
sort_suffix_length() return the length bytes needed to store the length
@@ -1316,9 +1321,21 @@ public:
}
return update_fl;
}
+
+ /*
+ @brief
+ Make a packed value for a field
+ @param
+ to buffer to store the value
+ */
+ virtual uint make_packed_record_field(uchar *to)
+ {
+ uchar* end= pack(to, ptr);
+ return static_cast<uint>(end - to);
+ }
virtual void store_field_value(uchar *val, uint len)
{
- memcpy(ptr, val, len);
+ memcpy(ptr, val, len);
}
virtual uint decimals() const { return 0; }
virtual Information_schema_numeric_attributes
@@ -4122,6 +4139,10 @@ public:
{
return (uint32) field_length + sort_suffix_length();
}
+ uint32 sort_length_without_suffix() const override
+ {
+ return (uint32) field_length;
+ }
virtual uint32 sort_suffix_length() const override
{
return (field_charset() == &my_charset_bin ? length_bytes : 0);
@@ -4463,6 +4484,10 @@ public:
{ return (uint32) (packlength); }
uint row_pack_length() const override { return pack_length_no_ptr(); }
uint32 sort_length() const override;
+ uint32 sort_length_without_suffix() const override
+ {
+ return field_length;
+ }
uint32 sort_suffix_length() const override;
uint32 value_length() override { return get_length(); }
virtual uint32 max_data_length() const override
diff --git a/sql/filesort.cc b/sql/filesort.cc
index ac43c96b0e0..e0389bb0343 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -35,6 +35,7 @@
#include "filesort_utils.h"
#include "sql_select.h"
#include "debug_sync.h"
+#include "uniques.h"
/* functions defined in this file */
@@ -1694,6 +1695,8 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
uint size_of_sort_length= param->using_packed_sortkeys() ?
Sort_keys::size_of_length_field : 0;
+ uint size_of_dupl_count= param->min_dupl_count ?
+ sizeof(element_count) : 0;
for (; ix < count; ++ix)
{
@@ -1709,14 +1712,16 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
buffpek->buffer_end())
break; // Incomplete record.
- uchar *plen= record + sort_length;
+ uchar *plen= record + sort_length + size_of_dupl_count;
+
uint res_length= param->get_result_length(plen);
if (plen + res_length > buffpek->buffer_end())
break; // Incomplete record.
- DBUG_ASSERT(res_length > 0);
+ DBUG_ASSERT(!param->sort_keys || res_length > 0);
DBUG_ASSERT(sort_length + res_length <= param->rec_length);
record+= sort_length;
record+= res_length;
+ record+= size_of_dupl_count;
}
DBUG_ASSERT(ix > 0);
count= ix;
@@ -1812,12 +1817,12 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
rec_length= param->rec_length;
res_length= param->res_length;
sort_length= param->sort_length;
- uint dupl_count_ofs= rec_length-sizeof(element_count);
uint min_dupl_count= param->min_dupl_count;
+ uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0;
+
bool check_dupl_count= flag && min_dupl_count;
offset= (rec_length-
(flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length);
- uint wr_len= flag ? res_length : rec_length;
uint wr_offset= flag ? offset : 0;
const bool using_packed_sortkeys= param->using_packed_sortkeys();
@@ -1867,10 +1872,16 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
Store it also in 'to_file'.
*/
buffpek= (Merge_chunk*) queue_top(&queue);
+ rec_length= param->get_record_length_for_unique(buffpek->current_key(),
+ size_of_dupl_count);
+
+ DBUG_ASSERT(rec_length <= param->sort_length);
+
memcpy(unique_buff, buffpek->current_key(), rec_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
if (min_dupl_count)
- memcpy(&dupl_count, unique_buff+dupl_count_ofs,
- sizeof(dupl_count));
+ memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count));
+
buffpek->advance_current_key(rec_length);
buffpek->decrement_mem_count();
if (buffpek->mem_count() == 0)
@@ -1900,28 +1911,31 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
src= buffpek->current_key();
if (cmp) // Remove duplicates
{
- uchar *current_key= buffpek->current_key();
- if (!(*cmp)(first_cmp_arg, &unique_buff, &current_key))
+ if (!(*cmp)(first_cmp_arg, &unique_buff, &src))
{
if (min_dupl_count)
{
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
element_count cnt;
memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt));
dupl_count+= cnt;
}
goto skip_duplicate;
}
+
if (min_dupl_count)
{
- memcpy(unique_buff+dupl_count_ofs, &dupl_count,
- sizeof(dupl_count));
+ DBUG_ASSERT(rec_length <= param->sort_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count));
}
+ res_length= rec_length - size_of_dupl_count;
src= unique_buff;
}
+ else
+ param->get_rec_and_res_len(src, &rec_length, &res_length);
{
- param->get_rec_and_res_len(buffpek->current_key(),
- &rec_length, &res_length);
const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
/*
@@ -1943,10 +1957,15 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
}
if (cmp)
{
+ rec_length= param->get_record_length_for_unique(buffpek->current_key(),
+ size_of_dupl_count);
+ DBUG_ASSERT(rec_length <= param->sort_length);
memcpy(unique_buff, buffpek->current_key(), rec_length);
if (min_dupl_count)
- memcpy(&dupl_count, unique_buff+dupl_count_ofs,
- sizeof(dupl_count));
+ {
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count));
+ }
}
if (!--max_rows)
{
@@ -1989,6 +2008,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
{
if (min_dupl_count)
{
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
element_count cnt;
memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt));
dupl_count+= cnt;
@@ -1998,13 +2018,22 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
}
if (min_dupl_count)
- memcpy(unique_buff+dupl_count_ofs, &dupl_count,
- sizeof(dupl_count));
+ {
+ DBUG_ASSERT(rec_length <= param->sort_length);
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count));
+ }
if (!check_dupl_count || dupl_count >= min_dupl_count)
{
src= unique_buff;
- if (my_b_write(to_file, src+wr_offset, wr_len))
+ res_length = rec_length - size_of_dupl_count;
+ const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
+ if (my_b_write(to_file,
+ src + (offset_for_packing ?
+ rec_length - res_length : // sort length
+ wr_offset),
+ bytes_to_write))
goto err; /* purecov: inspected */
if (!--max_rows)
goto end;
@@ -2022,17 +2051,28 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file,
for (uint ix= 0; ix < buffpek->mem_count(); ++ix)
{
uchar *src= buffpek->current_key();
- param->get_rec_and_res_len(src,
- &rec_length, &res_length);
- const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
- if (check_dupl_count)
+ if (cmp)
{
- memcpy((uchar *) &dupl_count,
- buffpek->current_key() + offset + dupl_count_ofs,
- sizeof(dupl_count));
- if (dupl_count < min_dupl_count)
- continue;
+ rec_length= param->get_record_length_for_unique(src,
+ size_of_dupl_count);
+ res_length= rec_length - size_of_dupl_count;
+ if (check_dupl_count)
+ {
+ /*
+ TODO varun: this looks incorrect to me
+ */
+ uint dupl_count_ofs= rec_length - sizeof(element_count);
+ memcpy(&dupl_count, src + dupl_count_ofs, sizeof(dupl_count));
+
+ if (dupl_count < min_dupl_count)
+ continue;
+ }
}
+ else
+ param->get_rec_and_res_len(src, &rec_length, &res_length);
+
+ const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
+
if(my_b_write(to_file,
src + (offset_for_packing ?
rec_length - res_length : // sort length
@@ -2532,6 +2572,23 @@ void Sort_param::try_to_pack_sortkeys()
rec_length= sort_length + addon_length;
}
+/*
+ @brief
+ Return the length of the record in the Unique tree
+
+ @param
+ to key value
+ size_of_dupl_count if min_dupl_count > 0, then the record length
+ needs size_of_dupl_count to store the counter
+*/
+uint32 Sort_param::get_record_length_for_unique(uchar *to,
+ uint size_of_dupl_count)
+{
+ return using_packed_sortkeys() ?
+ Unique::read_packed_length(to) + size_of_dupl_count :
+ rec_length;
+}
+
uint
Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item,
@@ -2742,6 +2799,45 @@ bool SORT_FIELD_ATTR::check_if_packing_possible(THD *thd) const
/*
+ @brief
+ Setup the SORT_FIELD structure
+
+ @param
+ fld field structure
+ exclude_nulls TRUE if nulls are not to be considered
+ with_suffix TRUE if length bytes needed to store the length
+ for binary charset
+
+ @note
+ Currently used only by Unique object
+ TODD varun: we can refactor the code for filesort to use this function.
+
+*/
+void SORT_FIELD::setup(Field *fld, bool exclude_nulls, bool with_suffix)
+{
+ field= fld;
+ item= NULL;
+ /*
+ For unique needs to be set to FALSE always
+ but we can even pass the reverse as an argument to the function
+ */
+ reverse= false;
+ original_length= length= (with_suffix ?
+ field->sort_length() :
+ field->sort_length_without_suffix());
+
+ cs= field->sort_charset();
+ suffix_length= with_suffix ? field->sort_suffix_length() : 0;
+ type= field->is_packable() ?
+ SORT_FIELD_ATTR::VARIABLE_SIZE :
+ SORT_FIELD_ATTR::FIXED_SIZE;
+ maybe_null= exclude_nulls ? false : field->maybe_null();
+ length_bytes= is_variable_sized() ?
+ number_storage_requirement(length) : 0;
+}
+
+
+/*
Compare function used for packing sort keys
*/
@@ -2871,16 +2967,45 @@ int compare_packed_sort_keys(void *sort_param,
unsigned char **a_ptr, unsigned char **b_ptr)
{
int retval= 0;
- size_t a_len, b_len;
Sort_param *param= (Sort_param*)sort_param;
Sort_keys *sort_keys= param->sort_keys;
uchar *a= *a_ptr;
uchar *b= *b_ptr;
- a+= Sort_keys::size_of_length_field;
- b+= Sort_keys::size_of_length_field;
- for (SORT_FIELD *sort_field= sort_keys->begin();
- sort_field != sort_keys->end(); sort_field++)
+ if ((retval= sort_keys->compare_keys(a + Sort_keys::size_of_length_field,
+ b + Sort_keys::size_of_length_field)))
+ return retval;
+
+ /*
+ this comparison is done for the case when the sort keys is appended with
+ the ROW_ID pointer. For such cases we don't have addon fields
+ so we can make a memcmp check over both the sort keys
+ */
+ if (!param->using_addon_fields())
+ {
+ a+= Sort_keys::read_sortkey_length(a);
+ b+= Sort_keys::read_sortkey_length(b);
+ retval= memcmp(a, b, param->res_length);
+ }
+ return retval;
+}
+
+
+/*
+ @brief
+ Compare two sort keys
+
+ @retval
+ >0 key a greater than b
+ =0 key a equal to b
+ <0 key a less than b
+*/
+
+int Sort_keys::compare_keys(uchar *a, uchar *b)
+{
+ int retval= 0;
+ size_t a_len, b_len;
+ for (SORT_FIELD *sort_field= begin(); sort_field != end(); sort_field++)
{
retval= sort_field->is_variable_sized() ?
sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) :
@@ -2891,15 +3016,7 @@ int compare_packed_sort_keys(void *sort_param,
a+= a_len;
b+= b_len;
-
}
- /*
- this comparison is done for the case when the sort keys is appended with
- the ROW_ID pointer. For such cases we don't have addon fields
- so we can make a memcmp check over both the sort keys
- */
- if (!param->using_addon_fields())
- retval= memcmp(a, b, param->res_length);
return retval;
}
diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc
index b9a84775311..48075106206 100644
--- a/sql/item_jsonfunc.cc
+++ b/sql/item_jsonfunc.cc
@@ -1452,7 +1452,7 @@ append_null:
}
-static int append_json_value_from_field(String *str,
+static bool append_json_value_from_field(String *str,
Item *i, Field *f, const uchar *key, size_t offset, String *tmp_val)
{
if (i->type_handler()->is_bool_type())
@@ -1498,6 +1498,73 @@ append_null:
}
+/*
+ @brief
+ Append the value of a field in JSON format
+
+ @param
+ str buffer to write the value
+ item JSON_ARRAYAGG item
+ field field whose value needs to be appended
+ tmp_val temp buffer
+
+ @note
+ Use this function when the field has the value in its ptr.
+ This is currently used when packing is done for JSON_ARRAYAGG function.
+ In the case of packing, we have to unpack the value to Field::ptr.
+
+ @retval
+ FALSE value appended in JSON format
+ TRUE error
+*/
+
+static bool append_json_value_from_field(String *str, Item *item, Field *field,
+ String *tmp_val)
+{
+ if (item->type_handler()->is_bool_type())
+ {
+ if (field->is_null())
+ goto append_null;
+
+ longlong v_int= field->val_int();
+ const char *t_f;
+ int t_f_len;
+
+ if (v_int)
+ {
+ t_f= "true";
+ t_f_len= 4;
+ }
+ else
+ {
+ t_f= "false";
+ t_f_len= 5;
+ }
+
+ return str->append(t_f, t_f_len);
+ }
+ {
+ if (field->is_null())
+ goto append_null;
+ String *sv= field->val_str(tmp_val);
+
+ if (item->is_json_type())
+ return str->append(sv->ptr(), sv->length());
+
+ if (item->result_type() == STRING_RESULT)
+ {
+ return str->append("\"", 1) ||
+ st_append_escaped(str, sv) ||
+ str->append("\"", 1);
+ }
+ return st_append_escaped(str, sv);
+ }
+
+append_null:
+ return str->append("null", 4);
+}
+
+
static int append_json_keyname(String *str, Item *item, String *tmp_val)
{
String *sv= item->val_str(tmp_val);
@@ -3689,6 +3756,19 @@ String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f,
}
+String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f,
+ String *tmp)
+{
+ m_tmp_json.length(0);
+
+ if (append_json_value_from_field(&m_tmp_json, i, f, tmp))
+ return NULL;
+
+ return &m_tmp_json;
+
+}
+
+
void Item_func_json_arrayagg::cut_max_length(String *result,
uint old_length, uint max_length) const
{
diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h
index 0408b3ad72f..16f700724bb 100644
--- a/sql/item_jsonfunc.h
+++ b/sql/item_jsonfunc.h
@@ -547,6 +547,7 @@ protected:
String *get_str_from_item(Item *i, String *tmp);
String *get_str_from_field(Item *i, Field *f, String *tmp,
const uchar *key, size_t offset);
+ String *get_str_from_field(Item *i, Field *f, String *tmp);
void cut_max_length(String *result,
uint old_length, uint max_length) const;
public:
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index e79344507b2..96b17e74019 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -701,6 +701,32 @@ int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2)
}
+/**
+ Correctly compare composite keys.
+
+ Used by the Unique class to compare packed keys. Will do correct comparisons
+ for composite keys with various field types.
+
+ @param arg Pointer to the relevant Aggregator_distinct instance
+ @param key1 left key image
+ @param key2 right key image
+
+ @return comparison result
+ @retval <0 if key1 < key2
+ @retval =0 if key1 = key2
+ @retval >0 if key1 > key2
+*/
+
+int
+Aggregator_distinct::composite_packed_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
+{
+ Aggregator_distinct *aggr= (Aggregator_distinct *) arg;
+ DBUG_ASSERT(aggr->tree);
+ return aggr->tree->compare_packed_keys(key1, key2);
+}
+
+
/***************************************************************************/
C_MODE_START
@@ -766,13 +792,19 @@ bool Aggregator_distinct::setup(THD *thd)
return TRUE;
/* Create a table with an unique key over all parameters */
+ uint non_const_items= 0;
for (uint i=0; i < item_sum->get_arg_count() ; i++)
{
Item *item=item_sum->get_arg(i);
if (list.push_back(item, thd->mem_root))
return TRUE; // End of memory
- if (item->const_item() && item->is_null())
- always_null= true;
+ if (item->const_item())
+ {
+ if (item->is_null())
+ always_null= true;
+ }
+ else
+ non_const_items++;
}
if (always_null)
return FALSE;
@@ -861,16 +893,27 @@ bool Aggregator_distinct::setup(THD *thd)
}
}
}
+
+ bool allow_packing= item_sum->is_packing_allowed(table, &tree_key_length);
+
+ if (allow_packing)
+ {
+ compare_key= (qsort_cmp2) composite_packed_key_cmp;
+ cmp_arg= (void*)this;
+ }
+
DBUG_ASSERT(tree == 0);
tree= new Unique(compare_key, cmp_arg, tree_key_length,
- item_sum->ram_limitation(thd));
+ item_sum->ram_limitation(thd), 0, allow_packing);
/*
The only time tree_key_length could be 0 is if someone does
count(distinct) on a char(0) field - stupid thing to do,
but this has to be handled - otherwise someone can crash
the server with a DoS attack
*/
- if (! tree)
+ if (!tree || tree->setup(thd, item_sum,
+ non_const_items, item_sum->get_arg_count(),
+ TRUE))
return TRUE;
}
return FALSE;
@@ -987,9 +1030,23 @@ bool Aggregator_distinct::add()
if (copy_funcs(tmp_table_param->items_to_copy, table->in_use))
return TRUE;
- for (Field **field=table->field ; *field ; field++)
- if ((*field)->is_real_null(0))
- return 0; // Don't count NULL
+ uchar *to= NULL, *orig_to= table->record[0] + table->s->null_bytes;
+ uint packed_length= 0;
+
+ if (is_distinct_packed())
+ {
+ orig_to= to= tree->get_packed_rec_ptr();
+ if ((to= make_packed_record(to)) == NULL)
+ return false;
+ packed_length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, packed_length);
+ }
+ else
+ {
+ for (Field **field=table->field ; *field ; field++)
+ if ((*field)->is_real_null(0))
+ return 0; // Don't count NULL
+ }
if (tree)
{
@@ -999,7 +1056,8 @@ bool Aggregator_distinct::add()
bloat the tree without providing any valuable info. Besides,
key_length used to initialize the tree didn't include space for them.
*/
- return tree->unique_add(table->record[0] + table->s->null_bytes);
+ return tree->unique_add(orig_to, tree->is_packed() ?
+ packed_length : tree->get_size());
}
if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))) &&
table->file->is_fatal_error(error, HA_CHECK_DUP))
@@ -1017,7 +1075,7 @@ bool Aggregator_distinct::add()
'0' values are also stored in the tree. This doesn't matter
for SUM(DISTINCT), but is important for AVG(DISTINCT)
*/
- return tree->unique_add(table->field[0]->ptr);
+ return tree->unique_add(table->field[0]->ptr, tree->get_size());
}
}
@@ -1778,6 +1836,48 @@ bool Aggregator_distinct::unique_walk_function_for_count(void *element)
}
+/*
+ @brief
+ Checks whether the unique tree is packed or not
+
+ @retval
+ TRUE tree stored packed values
+ FALSE otherwise
+*/
+
+bool Aggregator_distinct::is_distinct_packed()
+{
+ return tree && tree->is_packed();
+
+}
+
+
+/*
+ @brief
+ Make a record with packed values
+
+ @param
+ to buffer to store the packed value
+*/
+uchar *Aggregator_distinct::make_packed_record(uchar *to)
+{
+ to+= Unique::size_of_length_field;
+
+ for (Field **field=table->field ; *field ; field++)
+ {
+ Field *fld= (*field);
+ if (fld->is_real_null(0))
+ return NULL; // Don't count NULL
+
+ /*
+ Storing the packed values for the non-const fields of the record
+ */
+ to+= fld->make_packed_record_field(to);
+ }
+ return to;
+}
+
+
Aggregator_distinct::~Aggregator_distinct()
{
if (tree)
@@ -3612,6 +3712,33 @@ int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
/**
+ Compares the packed values for fields in expr list of GROUP_CONCAT.
+ @note
+
+ GROUP_CONCAT([DISTINCT] expr [,expr ...]
+ [ORDER BY {unsigned_integer | col_name | expr}
+ [ASC | DESC] [,col_name ...]]
+ [SEPARATOR str_val])
+
+ @return
+ @retval -1 : key1 < key2
+ @retval 0 : key1 = key2
+ @retval 1 : key1 > key2
+*/
+int group_concat_packed_key_cmp_with_distinct(void *arg,
+ const void *a_ptr,
+ const void *b_ptr)
+{
+ Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
+
+ DBUG_ASSERT(item_func->unique_filter);
+ uchar *a= (uchar*)a_ptr;
+ uchar *b= (uchar*)b_ptr;
+ return item_func->unique_filter->compare_packed_keys(a, b);
+}
+
+
+/**
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
*/
@@ -3778,10 +3905,21 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
String tmp((char *)table->record[1], table->s->reclength,
default_charset_info);
String tmp2;
- uchar *key= (uchar *) key_arg;
+ const uchar *key= (const uchar *) key_arg;
+ const uchar *key_end= NULL;
String *result= &item->result;
Item **arg= item->args, **arg_end= item->args + item->arg_count_field;
uint old_length= result->length();
+ SORT_FIELD *pos;
+
+ const bool packed= item->is_distinct_packed();
+
+ if (packed)
+ {
+ pos= item->unique_filter->get_sortorder();
+ key_end= key + item->unique_filter->get_full_size();
+ key+= Unique::size_of_length_field;
+ }
ulonglong *offset_limit= &item->copy_offset_limit;
ulonglong *row_limit = &item->copy_row_limit;
@@ -3822,11 +3960,43 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
Field *field= (*arg)->get_tmp_table_field();
if (field)
{
- uint offset= (field->offset(field->table->record[0]) -
- table->s->null_bytes);
- DBUG_ASSERT(offset < table->s->reclength);
- res= item->get_str_from_field(*arg, field, &tmp, key,
- offset + item->get_null_bytes());
+ if (packed)
+ {
+ DBUG_ASSERT(pos->field == field);
+ if (!item->skip_nulls() && field->maybe_null())
+ {
+ if (*key == 0)
+ {
+ // Case with NULL value
+ field->set_null();
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key++;
+ }
+ else
+ {
+ // Case with NOT NULL value
+ field->set_notnull();
+ const uchar *end= field->unpack(field->ptr, key+1, key_end, 0);
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key= end;
+ }
+ }
+ else
+ {
+ const uchar *end= field->unpack(field->ptr, key, key_end, 0);
+ res= item->get_str_from_field(*arg, field, &tmp);
+ key= end;
+ }
+ pos++;
+ }
+ else
+ {
+ uint offset= (field->offset(field->table->record[0]) -
+ table->s->null_bytes);
+ DBUG_ASSERT(offset < table->s->reclength);
+ res= item->get_str_from_field(*arg, field, &tmp, key,
+ offset + item->get_null_bytes());
+ }
}
else
res= item->get_str_from_item(*arg, &tmp);
@@ -4137,6 +4307,14 @@ bool Item_func_group_concat::add(bool exclude_nulls)
size_t row_str_len= 0;
StringBuffer<MAX_FIELD_WIDTH> buf;
String *res;
+ uchar *to= NULL, *orig_to= get_record_pointer();
+
+ if (is_distinct_packed())
+ {
+ orig_to= to= unique_filter->get_packed_rec_ptr();
+ to+= Unique::size_of_length_field;
+ }
+
for (uint i= 0; i < arg_count_field; i++)
{
Item *show_item= args[i];
@@ -4151,6 +4329,24 @@ bool Item_func_group_concat::add(bool exclude_nulls)
return 0; // Skip row if it contains null
if (tree && (res= field->val_str(&buf)))
row_str_len+= res->length();
+
+ /*
+ Storing the packed values for the non-const fields of the record
+ */
+ if (is_distinct_packed())
+ {
+ if (!exclude_nulls && field->maybe_null())
+ {
+ if (field->is_null_in_record((const uchar*) table->record[0]))
+ {
+ *to++=0;
+ continue;
+ }
+ *to++=1;
+ }
+ uchar* end= field->pack(to, field->ptr);
+ to+= static_cast<uint>(end - to);
+ }
}
else
{
@@ -4167,9 +4363,17 @@ bool Item_func_group_concat::add(bool exclude_nulls)
if (distinct)
{
+ DBUG_ASSERT(unique_filter);
+ uint packed_length= unique_filter->get_size();
+
+ if (unique_filter->is_packed())
+ {
+ packed_length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, packed_length);
+ }
/* Filter out duplicate rows. */
uint count= unique_filter->elements_in_tree();
- unique_filter->unique_add(get_record_pointer());
+ unique_filter->unique_add(get_record_pointer(), packed_length);
if (count == unique_filter->elements_in_tree())
row_eligible= FALSE;
}
@@ -4285,16 +4489,22 @@ bool Item_func_group_concat::setup(THD *thd)
/* Push all not constant fields to the list and create a temp table */
always_null= 0;
+ uint non_const_items= 0;
for (uint i= 0; i < arg_count_field; i++)
{
Item *item= args[i];
if (list.push_back(item, thd->mem_root))
DBUG_RETURN(TRUE);
- if (item->const_item() && item->is_null() && skip_nulls())
+ if (item->const_item())
{
- always_null= 1;
- DBUG_RETURN(FALSE);
+ if (item->is_null() && skip_nulls())
+ {
+ always_null= 1;
+ DBUG_RETURN(FALSE);
+ }
}
+ else
+ non_const_items++;
}
List<Item> all_fields(list);
@@ -4377,6 +4587,7 @@ bool Item_func_group_concat::setup(THD *thd)
the row is not added to the result.
*/
uint tree_key_length= table->s->reclength - table->s->null_bytes;
+ bool allow_packing= is_packing_allowed(&tree_key_length);
if (arg_count_order)
{
@@ -4395,10 +4606,16 @@ bool Item_func_group_concat::setup(THD *thd)
}
if (distinct)
- unique_filter= new Unique(get_comparator_function_for_distinct(),
+ {
+ unique_filter= new Unique(get_comparator_function_for_distinct(allow_packing),
(void*)this,
tree_key_length + get_null_bytes(),
- ram_limitation(thd));
+ ram_limitation(thd), 0, allow_packing);
+
+ if (!unique_filter || unique_filter->setup(thd, this, non_const_items,
+ arg_count_field, skip_nulls()))
+ DBUG_RETURN(TRUE);
+ }
if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
(offset_limit && offset_limit->cmp_type() != INT_RESULT))
{
@@ -4456,11 +4673,13 @@ String* Item_func_group_concat::val_str(String* str)
Get the comparator function for DISTINT clause
*/
-qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct()
+qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct(bool packed)
{
- return skip_nulls() ?
- group_concat_key_cmp_with_distinct :
- group_concat_key_cmp_with_distinct_with_nulls;
+ return packed ?
+ group_concat_packed_key_cmp_with_distinct :
+ (skip_nulls() ?
+ group_concat_key_cmp_with_distinct :
+ group_concat_key_cmp_with_distinct_with_nulls);
}
@@ -4488,9 +4707,11 @@ qsort_cmp2 Item_func_group_concat::get_comparator_function_for_order_by()
uchar* Item_func_group_concat::get_record_pointer()
{
- return skip_nulls() ?
- table->record[0] + table->s->null_bytes :
- table->record[0];
+ return is_distinct_packed() ?
+ unique_filter->get_packed_rec_ptr() :
+ (skip_nulls() ?
+ table->record[0] + table->s->null_bytes :
+ table->record[0]);
}
@@ -4511,6 +4732,102 @@ uint Item_func_group_concat::get_null_bytes()
}
+/*
+ @brief
+ Checks whether the unique tree is packed or not
+
+ @retval
+ TRUE tree stored packed values
+ FALSE otherwise
+*/
+
+bool Item_func_group_concat::is_distinct_packed()
+{
+ return unique_filter && unique_filter->is_packed();
+}
+
+
+/*
+ @brief
+ Checks if one can store packed values in the Unique tree
+
+ @param
+ total_length [OUT] length of the key in the tree
+
+ @retval
+ TRUE packing allowed
+ FALSE packing not allowed
+*/
+
+bool Item_func_group_concat::is_packing_allowed(uint* total_length)
+{
+ /*
+ TODO varun:
+ Currently Unique is not packed if ORDER BY clause is used
+ This needs to be implemented when MDEV-22089 is fixed
+ */
+ if (!distinct || arg_count_order)
+ return false;
+
+ return Item_sum::is_packing_allowed(table, total_length);
+}
+
+
+/*
+ @brief
+ Check if storing packed values inside the Unique tree is allowed
+
+ @param table Table structure
+ @total_length [OUT] max length of the packed key(takes into account
+ the length bytes also)
+*/
+
+
+bool Item_sum::is_packing_allowed(TABLE *table, uint* total_length)
+{
+ uint size_of_packable_fields= 0;
+ uint tot_length= 0;
+ for (uint i= 0; i < get_arg_count(); i++)
+ {
+ Item *item= args[i];
+ if (item->const_item())
+ continue;
+ Field *field= item->get_tmp_table_field();
+ if (field)
+ {
+ /*
+ For blob columns packing is not allowed
+ */
+ if (field->flags & BLOB_FLAG)
+ return false;
+
+ tot_length+= field->sort_length_without_suffix();
+ if (field->is_packable())
+ {
+ size_of_packable_fields+=
+ number_storage_requirement(field->sort_length_without_suffix());
+ }
+ }
+ }
+
+ // All fields are of fixed size
+ if (size_of_packable_fields == 0)
+ return false;
+
+ /*
+ TODO varun: we can introduce a heuristic here, no need to do packing if we
+ have small VARCHAR or CHAR
+ */
+ *total_length= tot_length;
+ /*
+ Unique::size_of_lengt_field is the length bytes to store the packed length
+ for each record inserted in the Unique tree
+ */
+ (*total_length)+= Unique::size_of_length_field + size_of_packable_fields;
+ return true;
+}
+
+
void Item_func_group_concat::print(String *str, enum_query_type query_type)
{
str->append(func_name());
diff --git a/sql/item_sum.h b/sql/item_sum.h
index dc520ce2578..39d5b79c1c4 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -590,6 +590,7 @@ public:
bool with_sum_func() const { return true; }
virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); }
+ bool is_packing_allowed(TABLE* table, uint* total_length);
};
@@ -696,7 +697,10 @@ public:
bool unique_walk_function(void *element);
bool unique_walk_function_for_count(void *element);
+ bool is_distinct_packed();
static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
+ static int composite_packed_key_cmp(void* arg, uchar* key1, uchar* key2);
+ uchar* make_packed_record(uchar *to);
};
@@ -1920,6 +1924,9 @@ protected:
friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
const void* key1,
const void* key2);
+ friend int group_concat_packed_key_cmp_with_distinct(void *arg,
+ const void *key1,
+ const void *key2);
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2);
friend int group_concat_key_cmp_with_order_with_nulls(void *arg,
@@ -1941,6 +1948,9 @@ protected:
virtual String *get_str_from_field(Item *i, Field *f, String *tmp,
const uchar *key, size_t offset)
{ return f->val_str(tmp, key + offset); }
+ virtual String *get_str_from_field(Item *i, Field *f, String *tmp)
+ { return f->val_str(tmp); }
+
virtual void cut_max_length(String *result,
uint old_length, uint max_length) const;
public:
@@ -2015,11 +2025,12 @@ public:
{ context= (Name_resolution_context *)cntx; return FALSE; }
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_group_concat>(thd, this); }
- qsort_cmp2 get_comparator_function_for_distinct();
+ qsort_cmp2 get_comparator_function_for_distinct(bool packed);
qsort_cmp2 get_comparator_function_for_order_by();
uchar* get_record_pointer();
uint get_null_bytes();
-
+ bool is_distinct_packed();
+ bool is_packing_allowed(uint* total_length);
};
#endif /* ITEM_SUM_INCLUDED */
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 80d67d884bf..7d65c85129b 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -11772,7 +11772,7 @@ int read_keys_and_merge_scans(THD *thd,
}
DBUG_ASSERT(file->ref_length == unique->get_size());
- DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size());
+ DBUG_ASSERT(thd->variables.sortbuff_size <= unique->get_max_in_memory_size());
for (;;)
{
@@ -11815,7 +11815,7 @@ int read_keys_and_merge_scans(THD *thd,
continue;
cur_quick->file->position(cur_quick->record);
- if (unique->unique_add((char*)cur_quick->file->ref))
+ if (unique->unique_add((char*)cur_quick->file->ref, unique->get_size()))
goto err;
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 58a786ed33c..c158d7c5a14 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -6454,6 +6454,7 @@ struct SORT_FIELD: public SORT_FIELD_ATTR
Field *field; /* Field to sort */
Item *item; /* Item if not sorting fields */
bool reverse; /* if descending sort */
+ void setup(Field *fld, bool exclude_nulls, bool with_suffix);
};
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 5942efcd601..db5b5b777a8 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -714,7 +714,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
{
table->file->position(table->record[0]);
if (unlikely((error=
- deltempfile->unique_add((char*) table->file->ref))))
+ deltempfile->unique_add((char*) table->file->ref,
+ deltempfile->get_size()))))
{
error= 1;
goto terminate_delete;
@@ -1350,7 +1351,8 @@ int multi_delete::send_data(List<Item> &values)
}
else
{
- error=tempfiles[secure_counter]->unique_add((char*) table->file->ref);
+ uint size= tempfiles[secure_counter]->get_size();
+ error=tempfiles[secure_counter]->unique_add((char*) table->file->ref, size);
if (unlikely(error))
{
error= 1; // Fatal error
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 40f0c5ede5f..d512a9727b6 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -310,6 +310,8 @@ public:
sort_length+= len;
}
+ int compare_keys(uchar *a, uchar *b);
+
static const uint size_of_length_field= 4;
private:
@@ -567,8 +569,8 @@ public:
bool using_packed_sortkeys() const
{
- DBUG_ASSERT(m_using_packed_sortkeys ==
- (sort_keys != NULL && sort_keys->using_packed_sortkeys()));
+ DBUG_ASSERT(sort_keys == NULL ||
+ (m_using_packed_sortkeys == sort_keys->using_packed_sortkeys()));
return m_using_packed_sortkeys;
}
@@ -578,6 +580,11 @@ public:
return addon_fields != NULL;
}
+ void set_using_packed_keys(bool val)
+ {
+ m_using_packed_sortkeys= val;
+ }
+
uint32 get_result_length(uchar *plen)
{
if (!m_using_packed_addons)
@@ -659,6 +666,12 @@ public:
{
return m_packed_format;
}
+ void set_packed_format(bool val)
+ {
+ m_packed_format= val;
+ }
+
+ uint32 get_record_length_for_unique(uchar *to, uint size_of_dupl_count);
private:
uint m_packable_length;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 879955af723..bf6cebbf7e8 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1570,7 +1570,16 @@ public:
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
- column->store_field_value((uchar *) elem, col_length);
+ uchar *to= (uchar* )elem;
+ if (column->is_packable())
+ {
+ column->unpack(column->ptr,
+ to + Unique::size_of_length_field,
+ to + Unique::read_packed_length(to), 0);
+ }
+ else
+ column->store_field_value(to, col_length);
+
histogram->set_value(curr_bucket,
column->pos_in_interval(min_value, max_value));
curr_bucket++;
@@ -1646,13 +1655,11 @@ public:
of the parameters to be passed to the constructor of the Unique object.
*/
- Count_distinct_field(Field *field, size_t max_heap_table_size)
+ Count_distinct_field(Field *field)
{
table_field= field;
- tree_key_length= field->pack_length();
-
- tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field,
- tree_key_length, max_heap_table_size, 1);
+ tree_key_length= 0;
+ tree= NULL;
}
virtual ~Count_distinct_field()
@@ -1670,6 +1677,48 @@ public:
return (tree != NULL);
}
+
+ /*
+ @brief
+ Create and setup the Unique object for the column
+
+ @param
+ thd Thread structure
+ max_heap_table_size max allowed size of the unique tree
+ */
+ bool setup(THD *thd, size_t max_heap_table_size)
+ {
+ if (table_field->is_packable())
+ {
+ tree_key_length= table_field->max_packed_col_length(table_field->pack_length());
+
+ tree_key_length+= Unique::size_of_length_field;
+ tree= new Unique((qsort_cmp2) simple_packed_str_key_cmp, (void*) this,
+ tree_key_length, max_heap_table_size, 1, TRUE);
+ }
+ else
+ {
+ if (table_field->type() == MYSQL_TYPE_BIT)
+ {
+ tree_key_length= sizeof(ulonglong);
+ tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
+ (void*) &tree_key_length,
+ tree_key_length, max_heap_table_size, 1, FALSE);
+ }
+ else
+ {
+ tree_key_length= table_field->pack_length();
+ tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) table_field,
+ tree_key_length, max_heap_table_size, 1, FALSE);
+ }
+ }
+ if (!tree)
+ return true; // OOM
+
+ return tree->setup(thd, table_field);
+ }
+
+
/*
@brief
Add the value of 'field' to the container of the Unique object 'tree'
@@ -1677,7 +1726,20 @@ public:
virtual bool add()
{
table_field->mark_unused_memory_as_defined();
- return tree->unique_add(table_field->ptr);
+ uchar *orig_to= table_field->ptr;
+ DBUG_ASSERT(tree);
+
+ uint length= tree->get_size();
+ if (tree->is_packed())
+ {
+ uchar *to;
+ orig_to= to= tree->get_packed_rec_ptr();
+ to+= Unique::size_of_length_field;
+ to+= table_field->make_packed_record_field(to);
+ length= static_cast<uint>(to - orig_to);
+ Unique::store_packed_length(orig_to, length);
+ }
+ return tree->unique_add(orig_to, length);
}
/*
@@ -1733,16 +1795,31 @@ public:
return table_field->collected_stats->histogram.get_values();
}
+ static int simple_packed_str_key_cmp(void* arg, uchar* key1, uchar* key2);
+ static int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2);
+
};
-static
-int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2)
+int Count_distinct_field::simple_ulonglong_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
{
ulonglong *val1= (ulonglong *) key1;
ulonglong *val2= (ulonglong *) key2;
return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1;
}
+
+
+/*
+ @brief
+ Compare function for packed keys
+*/
+int Count_distinct_field::simple_packed_str_key_cmp(void* arg,
+ uchar* key1, uchar* key2)
+{
+ Count_distinct_field *compare_arg= (Count_distinct_field*)arg;
+ return compare_arg->tree->compare_packed_keys(key1, key2);
+}
/*
@@ -1755,20 +1832,12 @@ class Count_distinct_field_bit: public Count_distinct_field
{
public:
- Count_distinct_field_bit(Field *field, size_t max_heap_table_size)
- {
- table_field= field;
- tree_key_length= sizeof(ulonglong);
-
- tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
- (void*) &tree_key_length,
- tree_key_length, max_heap_table_size, 1);
- }
+ Count_distinct_field_bit(Field *field): Count_distinct_field(field){}
bool add()
{
longlong val= table_field->val_int();
- return tree->unique_add(&val);
+ return tree->unique_add(&val, tree->get_size());
}
};
@@ -2344,8 +2413,11 @@ void Column_statistics_collected::init(THD *thd, Field *table_field)
{
count_distinct=
table_field->type() == MYSQL_TYPE_BIT ?
- new Count_distinct_field_bit(table_field, max_heap_table_size) :
- new Count_distinct_field(table_field, max_heap_table_size);
+ new Count_distinct_field_bit(table_field) :
+ new Count_distinct_field(table_field);
+
+ if (count_distinct && count_distinct->setup(thd, max_heap_table_size))
+ count_distinct= NULL;
}
if (count_distinct && !count_distinct->exists())
count_distinct= NULL;
diff --git a/sql/uniques.cc b/sql/uniques.cc
index a0cebe3e4dd..d8772f366fb 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -48,12 +48,14 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique)
when tree implementation chooses to store pointer to key in TREE_ELEMENT
(instead of storing the element itself there)
*/
- return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
+ return (unique->is_packed() ?
+ my_b_write(&unique->file, key, Unique::read_packed_length(key)) :
+ my_b_write(&unique->file, key, unique->size)) ? 1 : 0;
}
int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique)
{
- return my_b_write(&unique->file, key, unique->size) ||
+ return unique_write_to_file(key, count, unique) ||
my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0;
}
@@ -79,10 +81,14 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
uint size_arg, size_t max_in_memory_size_arg,
- uint min_dupl_count_arg)
+ uint min_dupl_count_arg, bool packed_arg)
:max_in_memory_size(max_in_memory_size_arg),
size(size_arg),
+ packed(packed_arg),
+ memory_used(0), packed_rec_ptr(NULL),
+ sortorder(NULL), sort_keys(NULL),
elements(0)
+
{
my_b_clear(&file);
min_dupl_count= min_dupl_count_arg;
@@ -90,18 +96,32 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
if (min_dupl_count_arg)
full_size+= sizeof(element_count);
with_counters= MY_TEST(min_dupl_count_arg);
- init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func,
+ init_tree(&tree, (max_in_memory_size / 16), 0, 0, comp_func,
NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
/* If the following fail's the next add will also fail */
my_init_dynamic_array(PSI_INSTRUMENT_ME, &file_ptrs, sizeof(Merge_chunk), 16,
16, MYF(MY_THREAD_SPECIFIC));
+
+ if (packed)
+ {
+ packed_rec_ptr= (uchar *)my_malloc(PSI_INSTRUMENT_ME,
+ size,
+ MYF(MY_WME | MY_THREAD_SPECIFIC));
+ }
/*
If you change the following, change it in get_max_elements function, too.
*/
max_elements= (ulong) (max_in_memory_size /
ALIGN_SIZE(sizeof(TREE_ELEMENT)+size));
if (!max_elements)
+ {
max_elements= 1;
+ /*
+ Need to ensure that we have memory to store atleast one record
+ in the Unique tree
+ */
+ max_in_memory_size= sizeof(TREE_ELEMENT) + size;
+ }
(void) open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
MYF(MY_WME));
@@ -371,6 +391,7 @@ Unique::~Unique()
close_cached_file(&file);
delete_tree(&tree, 0);
delete_dynamic(&file_ptrs);
+ my_free(packed_rec_ptr);
}
@@ -389,6 +410,10 @@ bool Unique::flush()
(void*) this, left_root_right) ||
insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
return 1;
+ /**
+ the tree gets reset so make sure the memory used is reset too
+ */
+ memory_used= 0;
delete_tree(&tree, 0);
return 0;
}
@@ -495,7 +520,8 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
uint key_length, Merge_chunk *begin, Merge_chunk *end,
tree_walk_action walk_action, void *walk_action_arg,
qsort_cmp2 compare, void *compare_arg,
- IO_CACHE *file, bool with_counters)
+ IO_CACHE *file, bool with_counters,
+ uint min_dupl_count, bool packed)
{
BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
QUEUE queue;
@@ -521,7 +547,12 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
// read_to_buffer() needs only rec_length.
Sort_param sort_param;
sort_param.rec_length= key_length;
+ sort_param.sort_length= key_length;
+ sort_param.min_dupl_count= min_dupl_count;
+ DBUG_ASSERT(sort_param.res_length == 0);
DBUG_ASSERT(!sort_param.using_addon_fields());
+ sort_param.set_using_packed_keys(packed);
+ uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0;
/*
Invariant: queue must contain top element from each tree, until a tree
@@ -534,7 +565,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
top->set_buffer(merge_buffer + (top - begin) * piece_size,
merge_buffer + (top - begin) * piece_size + piece_size);
top->set_max_keys(max_key_count_per_piece);
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
DBUG_ASSERT(bytes_read);
@@ -555,6 +586,10 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
read next key from the cache or from the file and push it to the
queue; this gives new top.
*/
+ key_length= sort_param.get_record_length_for_unique((uchar*)old_key,
+ size_of_dupl_count);
+
+ cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
top->advance_current_key(key_length);
top->decrement_mem_count();
if (top->mem_count())
@@ -564,7 +599,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
/* save old_key not to overwrite it in read_to_buffer */
memcpy(save_key_buff, old_key, key_length);
old_key= save_key_buff;
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
else if (bytes_read) /* top->key, top->mem_count are reset */
@@ -604,15 +639,18 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
{
do
{
-
+ key_length= sort_param.get_record_length_for_unique(top->current_key(),
+ size_of_dupl_count);
+ cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
cnt= with_counters ?
get_counter_from_merged_element(top->current_key(), cnt_ofs) : 1;
if (walk_action(top->current_key(), cnt, walk_action_arg))
goto end;
+
top->advance_current_key(key_length);
}
while (top->decrement_mem_count());
- bytes_read= read_to_buffer(file, top, &sort_param, false);
+ bytes_read= read_to_buffer(file, top, &sort_param, packed);
if (unlikely(bytes_read == (ulong) -1))
goto end;
}
@@ -678,7 +716,8 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
(Merge_chunk *) file_ptrs.buffer,
(Merge_chunk *) file_ptrs.buffer + file_ptrs.elements,
action, walk_action_arg,
- tree.compare, tree.custom_arg, &file, with_counters);
+ tree.compare, tree.custom_arg, &file, with_counters,
+ min_dupl_count, is_packed());
}
my_free(merge_buffer);
return res;
@@ -730,9 +769,11 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size,
sort_param.max_keys_per_buffer=
(uint) MY_MAX((max_in_memory_size / sort_param.sort_length), MERGEBUFF2);
sort_param.not_killable= 1;
+ sort_param.set_using_packed_keys(is_packed());
+ sort_param.set_packed_format(is_packed());
sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer *
- sort_param.sort_length);
+ sort_param.sort_length);
sort_param.compare= (qsort2_cmp) buffpek_compare;
sort_param.cmp_context.key_compare= tree.compare;
@@ -760,7 +801,8 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size,
file_ptrs.elements= maxbuffer+1;
return 0;
}
- if (merge_index(&sort_param, Bounds_checked_array<uchar>(buff, buff_size),
+ if (merge_index(&sort_param,
+ Bounds_checked_array<uchar>(buff, buff_size),
file_ptr, maxbuffer, &file, outfile))
goto err;
error= 0;
@@ -789,6 +831,8 @@ bool Unique::get(TABLE *table)
sort.return_rows= elements+tree.elements_in_tree;
DBUG_ENTER("Unique::get");
+ DBUG_ASSERT(packed == FALSE);
+
if (my_b_tell(&file) == 0)
{
/* Whole tree is in memory; Don't use disk if you don't need to */
@@ -831,3 +875,114 @@ err:
my_free(sort_buffer);
DBUG_RETURN(rc);
}
+
+
+/*
+ @brief
+ Setup the structures that are used when Unique stores packed values
+
+ @param thd thread structure
+ @param item item of aggregate function
+ @param non_const_args number of non constant arguments
+ @param arg_count total number of arguments
+ @param exclude_nulls TRUE: NULLS should be excluded
+ FALSE: otherwise
+
+ @note
+ This implementation is used by GROUP_CONCAT as it can have more than
+ one arguments in the argument list.
+
+ @retval
+ TRUE error
+ FALSE setup successful
+*/
+
+bool
+Unique::setup(THD *thd, Item_sum *item, uint non_const_args,
+ uint arg_count, bool exclude_nulls)
+{
+ if (!packed) // no packing so don't create the sortorder list
+ return false;
+ SORT_FIELD *sort,*pos;
+ if (sortorder)
+ return false;
+ DBUG_ASSERT(sort_keys == NULL);
+ sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * non_const_args);
+ pos= sort= sortorder;
+ if (!pos)
+ return true;
+ sort_keys= new Sort_keys(sortorder, non_const_args);
+ if (!sort_keys)
+ return true;
+ sort=pos= sortorder;
+ for (uint i= 0; i < arg_count; i++)
+ {
+ Item *arg= item->get_arg(i);
+ if (arg->const_item())
+ continue;
+ Field *field= arg->get_tmp_table_field();
+ if (!field)
+ continue;
+
+ pos->setup(field, exclude_nulls, false);
+ pos++;
+ }
+ return false;
+}
+
+
+/*
+ @brief
+ Setup the structures that are used when Unique stores packed values
+
+ @param thd thread structure
+ @param field field structure
+
+ @retval
+ TRUE error
+ FALSE setup successful
+*/
+
+bool Unique::setup(THD *thd, Field *field)
+{
+ if (!packed) // no packing so don't create the sortorder list
+ return false;
+
+ SORT_FIELD *sort,*pos;
+ if (sortorder)
+ return false;
+
+ DBUG_ASSERT(sort_keys == NULL);
+ sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD));
+ pos= sort= sortorder;
+ if (!pos)
+ return true;
+ sort_keys= new Sort_keys(sortorder, 1);
+ if (!sort_keys)
+ return true;
+ sort=pos= sortorder;
+ pos->setup(field, true, false); // Nulls are always excluded
+
+ return false;
+}
+
+
+/*
+ @brief
+ Compare two packed keys inside the Unique tree
+
+ @param a_ptr packed sort key
+ @param b_ptr packed sort key
+
+ @retval
+ >0 key a_ptr greater than b_ptr
+ =0 key a_ptr equal to b_ptr
+ <0 key a_ptr less than b_ptr
+
+*/
+
+int Unique::compare_packed_keys(uchar *a_ptr, uchar *b_ptr)
+{
+ return sort_keys->compare_keys(a_ptr + Unique::size_of_length_field,
+ b_ptr + Unique::size_of_length_field);
+}
diff --git a/sql/uniques.h b/sql/uniques.h
index 7e12a391fbd..1ade09408bd 100644
--- a/sql/uniques.h
+++ b/sql/uniques.h
@@ -44,26 +44,86 @@ class Unique :public Sql_alloc
it to be written to record_pointers.
always 0 for unions, > 0 for intersections */
bool with_counters;
+ /*
+ TRUE : Value stored is of variable size
+ FALSE : Value stored is of fixed size
+ */
+ bool packed;
+ /*
+ size in bytes used for storing keys in the Unique tree
+ */
+ size_t memory_used;
+ /*
+ Packed record ptr for a record of the table, the packed value in this
+ record is added to the unique tree
+ */
+ uchar* packed_rec_ptr;
+
+ /*
+ Array of SORT_FIELD structure storing the information about the key parts
+ in the sort key of the Unique tree
+ @see Unique::setup()
+ */
+ SORT_FIELD *sortorder;
+
+ /*
+ Structure storing information about usage of keys
+ */
+ Sort_keys *sort_keys;
bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
bool flush();
+ // return the amount of unused memory in the Unique tree
+ size_t space_left()
+ {
+ DBUG_ASSERT(max_in_memory_size >= memory_used);
+ return max_in_memory_size - memory_used;
+ }
+
+ // Check if the Unique tree is full or not
+ bool is_full(size_t record_size)
+ {
+ if (!tree.elements_in_tree) // Atleast insert one element in the tree
+ return false;
+ return record_size > space_left();
+ }
+
public:
ulong elements;
SORT_INFO sort;
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
- uint size_arg, size_t max_in_memory_size_arg,
- uint min_dupl_count_arg= 0);
+ uint size_arg, size_t max_in_memory_size_arg,
+ uint min_dupl_count_arg= 0, bool packed_arg= false);
~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; }
- inline bool unique_add(void *ptr)
+
+ /*
+ TODO varun:
+ remove the unique_add function above and pass the size as a parameter.
+ So for fixed types we would pass the fixed size as argument.
+ */
+
+ inline bool unique_add(void *ptr, uint size_arg)
{
DBUG_ENTER("unique_add");
DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
- if (!(tree.flag & TREE_ONLY_DUPS) &&
- tree.elements_in_tree >= max_elements && flush())
+ TREE_ELEMENT *res;
+ size_t rec_size= size_arg + sizeof(TREE_ELEMENT) + tree.size_of_element;
+
+ if (!(tree.flag & TREE_ONLY_DUPS) && is_full(rec_size) && flush())
DBUG_RETURN(1);
- DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
+ uint count= tree.elements_in_tree;
+ res= tree_insert(&tree, ptr, size_arg, tree.custom_arg);
+ if (tree.elements_in_tree != count)
+ {
+ /*
+ increment memory used only when a unique element is inserted
+ in the tree
+ */
+ memory_used+= rec_size;
+ }
+ DBUG_RETURN(!res);
}
bool is_in_memory() { return (my_b_tell(&file) == 0); }
@@ -96,7 +156,42 @@ public:
bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
uint get_size() const { return size; }
+ uint get_full_size() const { return full_size; }
size_t get_max_in_memory_size() const { return max_in_memory_size; }
+ uchar *get_packed_rec_ptr() { return packed_rec_ptr; }
+ Sort_keys *get_keys() { return sort_keys; }
+ SORT_FIELD *get_sortorder() { return sortorder; }
+ bool is_count_stored() { return with_counters; }
+
+ // returns TRUE if the unique tree stores packed values
+ bool is_packed() { return packed; }
+
+ static void store_packed_length(uchar *p, uint sz)
+ {
+ int4store(p, sz - size_of_length_field);
+ }
+
+ // returns the length of the key along with the length bytes for the key
+ static uint read_packed_length(uchar *p)
+ {
+ return size_of_length_field + uint4korr(p);
+ }
+
+ bool setup(THD *thd, Item_sum *item, uint non_const_args,
+ uint arg_count, bool exclude_nulls);
+ bool setup(THD *thd, Field *field);
+ int compare_packed_keys(uchar *a, uchar *b);
+
+ /*
+ TODO varun:
+ Currently the size_of_length_field is set to 4 but we can also use 2 here,
+ as fields are always inserted in the unique tree either from the base table
+ or from the temp table.
+ This does not look possible as the merge process
+ in merge buffers expect the length of dynmice sort keys to be stored in
+ 4 bytes, maybe this needs to be discussed
+ */
+ static const uint size_of_length_field= 4;
friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);