diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/field.h | 27 | ||||
-rw-r--r-- | sql/filesort.cc | 195 | ||||
-rw-r--r-- | sql/item_jsonfunc.cc | 82 | ||||
-rw-r--r-- | sql/item_jsonfunc.h | 1 | ||||
-rw-r--r-- | sql/item_sum.cc | 373 | ||||
-rw-r--r-- | sql/item_sum.h | 15 | ||||
-rw-r--r-- | sql/opt_range.cc | 4 | ||||
-rw-r--r-- | sql/sql_class.h | 1 | ||||
-rw-r--r-- | sql/sql_delete.cc | 6 | ||||
-rw-r--r-- | sql/sql_sort.h | 17 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 114 | ||||
-rw-r--r-- | sql/uniques.cc | 179 | ||||
-rw-r--r-- | sql/uniques.h | 107 |
13 files changed, 1005 insertions, 116 deletions
diff --git a/sql/field.h b/sql/field.h index e4c5ffcc0de..693d0b63ded 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1109,6 +1109,11 @@ public: */ virtual uint32 data_length() { return pack_length(); } virtual uint32 sort_length() const { return pack_length(); } + /* + returns the sort_length for a field without the suffix length bytes + for fields with binary charset. + */ + virtual uint32 sort_length_without_suffix() const { return pack_length(); } /* sort_suffix_length() return the length bytes needed to store the length @@ -1316,9 +1321,21 @@ public: } return update_fl; } + + /* + @brief + Make a packed value for a field + @param + to buffer to store the value + */ + virtual uint make_packed_record_field(uchar *to) + { + uchar* end= pack(to, ptr); + return static_cast<uint>(end - to); + } virtual void store_field_value(uchar *val, uint len) { - memcpy(ptr, val, len); + memcpy(ptr, val, len); } virtual uint decimals() const { return 0; } virtual Information_schema_numeric_attributes @@ -4122,6 +4139,10 @@ public: { return (uint32) field_length + sort_suffix_length(); } + uint32 sort_length_without_suffix() const override + { + return (uint32) field_length; + } virtual uint32 sort_suffix_length() const override { return (field_charset() == &my_charset_bin ? length_bytes : 0); @@ -4463,6 +4484,10 @@ public: { return (uint32) (packlength); } uint row_pack_length() const override { return pack_length_no_ptr(); } uint32 sort_length() const override; + uint32 sort_length_without_suffix() const override + { + return field_length; + } uint32 sort_suffix_length() const override; uint32 value_length() override { return get_length(); } virtual uint32 max_data_length() const override diff --git a/sql/filesort.cc b/sql/filesort.cc index ac43c96b0e0..e0389bb0343 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -35,6 +35,7 @@ #include "filesort_utils.h" #include "sql_select.h" #include "debug_sync.h" +#include "uniques.h" /* functions defined in this file */ @@ -1694,6 +1695,8 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, uint size_of_sort_length= param->using_packed_sortkeys() ? Sort_keys::size_of_length_field : 0; + uint size_of_dupl_count= param->min_dupl_count ? + sizeof(element_count) : 0; for (; ix < count; ++ix) { @@ -1709,14 +1712,16 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, buffpek->buffer_end()) break; // Incomplete record. - uchar *plen= record + sort_length; + uchar *plen= record + sort_length + size_of_dupl_count; + uint res_length= param->get_result_length(plen); if (plen + res_length > buffpek->buffer_end()) break; // Incomplete record. - DBUG_ASSERT(res_length > 0); + DBUG_ASSERT(!param->sort_keys || res_length > 0); DBUG_ASSERT(sort_length + res_length <= param->rec_length); record+= sort_length; record+= res_length; + record+= size_of_dupl_count; } DBUG_ASSERT(ix > 0); count= ix; @@ -1812,12 +1817,12 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, rec_length= param->rec_length; res_length= param->res_length; sort_length= param->sort_length; - uint dupl_count_ofs= rec_length-sizeof(element_count); uint min_dupl_count= param->min_dupl_count; + uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0; + bool check_dupl_count= flag && min_dupl_count; offset= (rec_length- (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length); - uint wr_len= flag ? res_length : rec_length; uint wr_offset= flag ? offset : 0; const bool using_packed_sortkeys= param->using_packed_sortkeys(); @@ -1867,10 +1872,16 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, Store it also in 'to_file'. */ buffpek= (Merge_chunk*) queue_top(&queue); + rec_length= param->get_record_length_for_unique(buffpek->current_key(), + size_of_dupl_count); + + DBUG_ASSERT(rec_length <= param->sort_length); + memcpy(unique_buff, buffpek->current_key(), rec_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); if (min_dupl_count) - memcpy(&dupl_count, unique_buff+dupl_count_ofs, - sizeof(dupl_count)); + memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count)); + buffpek->advance_current_key(rec_length); buffpek->decrement_mem_count(); if (buffpek->mem_count() == 0) @@ -1900,28 +1911,31 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, src= buffpek->current_key(); if (cmp) // Remove duplicates { - uchar *current_key= buffpek->current_key(); - if (!(*cmp)(first_cmp_arg, &unique_buff, ¤t_key)) + if (!(*cmp)(first_cmp_arg, &unique_buff, &src)) { if (min_dupl_count) { + uint dupl_count_ofs= rec_length - sizeof(element_count); element_count cnt; memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); dupl_count+= cnt; } goto skip_duplicate; } + if (min_dupl_count) { - memcpy(unique_buff+dupl_count_ofs, &dupl_count, - sizeof(dupl_count)); + DBUG_ASSERT(rec_length <= param->sort_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count)); } + res_length= rec_length - size_of_dupl_count; src= unique_buff; } + else + param->get_rec_and_res_len(src, &rec_length, &res_length); { - param->get_rec_and_res_len(buffpek->current_key(), - &rec_length, &res_length); const uint bytes_to_write= (flag == 0) ? rec_length : res_length; /* @@ -1943,10 +1957,15 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, } if (cmp) { + rec_length= param->get_record_length_for_unique(buffpek->current_key(), + size_of_dupl_count); + DBUG_ASSERT(rec_length <= param->sort_length); memcpy(unique_buff, buffpek->current_key(), rec_length); if (min_dupl_count) - memcpy(&dupl_count, unique_buff+dupl_count_ofs, - sizeof(dupl_count)); + { + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count)); + } } if (!--max_rows) { @@ -1989,6 +2008,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, { if (min_dupl_count) { + uint dupl_count_ofs= rec_length - sizeof(element_count); element_count cnt; memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); dupl_count+= cnt; @@ -1998,13 +2018,22 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, } if (min_dupl_count) - memcpy(unique_buff+dupl_count_ofs, &dupl_count, - sizeof(dupl_count)); + { + DBUG_ASSERT(rec_length <= param->sort_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count)); + } if (!check_dupl_count || dupl_count >= min_dupl_count) { src= unique_buff; - if (my_b_write(to_file, src+wr_offset, wr_len)) + res_length = rec_length - size_of_dupl_count; + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + if (my_b_write(to_file, + src + (offset_for_packing ? + rec_length - res_length : // sort length + wr_offset), + bytes_to_write)) goto err; /* purecov: inspected */ if (!--max_rows) goto end; @@ -2022,17 +2051,28 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, for (uint ix= 0; ix < buffpek->mem_count(); ++ix) { uchar *src= buffpek->current_key(); - param->get_rec_and_res_len(src, - &rec_length, &res_length); - const uint bytes_to_write= (flag == 0) ? rec_length : res_length; - if (check_dupl_count) + if (cmp) { - memcpy((uchar *) &dupl_count, - buffpek->current_key() + offset + dupl_count_ofs, - sizeof(dupl_count)); - if (dupl_count < min_dupl_count) - continue; + rec_length= param->get_record_length_for_unique(src, + size_of_dupl_count); + res_length= rec_length - size_of_dupl_count; + if (check_dupl_count) + { + /* + TODO varun: this looks incorrect to me + */ + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(&dupl_count, src + dupl_count_ofs, sizeof(dupl_count)); + + if (dupl_count < min_dupl_count) + continue; + } } + else + param->get_rec_and_res_len(src, &rec_length, &res_length); + + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + if(my_b_write(to_file, src + (offset_for_packing ? rec_length - res_length : // sort length @@ -2532,6 +2572,23 @@ void Sort_param::try_to_pack_sortkeys() rec_length= sort_length + addon_length; } +/* + @brief + Return the length of the record in the Unique tree + + @param + to key value + size_of_dupl_count if min_dupl_count > 0, then the record length + needs size_of_dupl_count to store the counter +*/ +uint32 Sort_param::get_record_length_for_unique(uchar *to, + uint size_of_dupl_count) +{ + return using_packed_sortkeys() ? + Unique::read_packed_length(to) + size_of_dupl_count : + rec_length; +} + uint Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item, @@ -2742,6 +2799,45 @@ bool SORT_FIELD_ATTR::check_if_packing_possible(THD *thd) const /* + @brief + Setup the SORT_FIELD structure + + @param + fld field structure + exclude_nulls TRUE if nulls are not to be considered + with_suffix TRUE if length bytes needed to store the length + for binary charset + + @note + Currently used only by Unique object + TODD varun: we can refactor the code for filesort to use this function. + +*/ +void SORT_FIELD::setup(Field *fld, bool exclude_nulls, bool with_suffix) +{ + field= fld; + item= NULL; + /* + For unique needs to be set to FALSE always + but we can even pass the reverse as an argument to the function + */ + reverse= false; + original_length= length= (with_suffix ? + field->sort_length() : + field->sort_length_without_suffix()); + + cs= field->sort_charset(); + suffix_length= with_suffix ? field->sort_suffix_length() : 0; + type= field->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + maybe_null= exclude_nulls ? false : field->maybe_null(); + length_bytes= is_variable_sized() ? + number_storage_requirement(length) : 0; +} + + +/* Compare function used for packing sort keys */ @@ -2871,16 +2967,45 @@ int compare_packed_sort_keys(void *sort_param, unsigned char **a_ptr, unsigned char **b_ptr) { int retval= 0; - size_t a_len, b_len; Sort_param *param= (Sort_param*)sort_param; Sort_keys *sort_keys= param->sort_keys; uchar *a= *a_ptr; uchar *b= *b_ptr; - a+= Sort_keys::size_of_length_field; - b+= Sort_keys::size_of_length_field; - for (SORT_FIELD *sort_field= sort_keys->begin(); - sort_field != sort_keys->end(); sort_field++) + if ((retval= sort_keys->compare_keys(a + Sort_keys::size_of_length_field, + b + Sort_keys::size_of_length_field))) + return retval; + + /* + this comparison is done for the case when the sort keys is appended with + the ROW_ID pointer. For such cases we don't have addon fields + so we can make a memcmp check over both the sort keys + */ + if (!param->using_addon_fields()) + { + a+= Sort_keys::read_sortkey_length(a); + b+= Sort_keys::read_sortkey_length(b); + retval= memcmp(a, b, param->res_length); + } + return retval; +} + + +/* + @brief + Compare two sort keys + + @retval + >0 key a greater than b + =0 key a equal to b + <0 key a less than b +*/ + +int Sort_keys::compare_keys(uchar *a, uchar *b) +{ + int retval= 0; + size_t a_len, b_len; + for (SORT_FIELD *sort_field= begin(); sort_field != end(); sort_field++) { retval= sort_field->is_variable_sized() ? sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) : @@ -2891,15 +3016,7 @@ int compare_packed_sort_keys(void *sort_param, a+= a_len; b+= b_len; - } - /* - this comparison is done for the case when the sort keys is appended with - the ROW_ID pointer. For such cases we don't have addon fields - so we can make a memcmp check over both the sort keys - */ - if (!param->using_addon_fields()) - retval= memcmp(a, b, param->res_length); return retval; } diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index b9a84775311..48075106206 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -1452,7 +1452,7 @@ append_null: } -static int append_json_value_from_field(String *str, +static bool append_json_value_from_field(String *str, Item *i, Field *f, const uchar *key, size_t offset, String *tmp_val) { if (i->type_handler()->is_bool_type()) @@ -1498,6 +1498,73 @@ append_null: } +/* + @brief + Append the value of a field in JSON format + + @param + str buffer to write the value + item JSON_ARRAYAGG item + field field whose value needs to be appended + tmp_val temp buffer + + @note + Use this function when the field has the value in its ptr. + This is currently used when packing is done for JSON_ARRAYAGG function. + In the case of packing, we have to unpack the value to Field::ptr. + + @retval + FALSE value appended in JSON format + TRUE error +*/ + +static bool append_json_value_from_field(String *str, Item *item, Field *field, + String *tmp_val) +{ + if (item->type_handler()->is_bool_type()) + { + if (field->is_null()) + goto append_null; + + longlong v_int= field->val_int(); + const char *t_f; + int t_f_len; + + if (v_int) + { + t_f= "true"; + t_f_len= 4; + } + else + { + t_f= "false"; + t_f_len= 5; + } + + return str->append(t_f, t_f_len); + } + { + if (field->is_null()) + goto append_null; + String *sv= field->val_str(tmp_val); + + if (item->is_json_type()) + return str->append(sv->ptr(), sv->length()); + + if (item->result_type() == STRING_RESULT) + { + return str->append("\"", 1) || + st_append_escaped(str, sv) || + str->append("\"", 1); + } + return st_append_escaped(str, sv); + } + +append_null: + return str->append("null", 4); +} + + static int append_json_keyname(String *str, Item *item, String *tmp_val) { String *sv= item->val_str(tmp_val); @@ -3689,6 +3756,19 @@ String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f, } +String *Item_func_json_arrayagg::get_str_from_field(Item *i,Field *f, + String *tmp) +{ + m_tmp_json.length(0); + + if (append_json_value_from_field(&m_tmp_json, i, f, tmp)) + return NULL; + + return &m_tmp_json; + +} + + void Item_func_json_arrayagg::cut_max_length(String *result, uint old_length, uint max_length) const { diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h index 0408b3ad72f..16f700724bb 100644 --- a/sql/item_jsonfunc.h +++ b/sql/item_jsonfunc.h @@ -547,6 +547,7 @@ protected: String *get_str_from_item(Item *i, String *tmp); String *get_str_from_field(Item *i, Field *f, String *tmp, const uchar *key, size_t offset); + String *get_str_from_field(Item *i, Field *f, String *tmp); void cut_max_length(String *result, uint old_length, uint max_length) const; public: diff --git a/sql/item_sum.cc b/sql/item_sum.cc index e79344507b2..96b17e74019 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -701,6 +701,32 @@ int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2) } +/** + Correctly compare composite keys. + + Used by the Unique class to compare packed keys. Will do correct comparisons + for composite keys with various field types. + + @param arg Pointer to the relevant Aggregator_distinct instance + @param key1 left key image + @param key2 right key image + + @return comparison result + @retval <0 if key1 < key2 + @retval =0 if key1 = key2 + @retval >0 if key1 > key2 +*/ + +int +Aggregator_distinct::composite_packed_key_cmp(void* arg, + uchar* key1, uchar* key2) +{ + Aggregator_distinct *aggr= (Aggregator_distinct *) arg; + DBUG_ASSERT(aggr->tree); + return aggr->tree->compare_packed_keys(key1, key2); +} + + /***************************************************************************/ C_MODE_START @@ -766,13 +792,19 @@ bool Aggregator_distinct::setup(THD *thd) return TRUE; /* Create a table with an unique key over all parameters */ + uint non_const_items= 0; for (uint i=0; i < item_sum->get_arg_count() ; i++) { Item *item=item_sum->get_arg(i); if (list.push_back(item, thd->mem_root)) return TRUE; // End of memory - if (item->const_item() && item->is_null()) - always_null= true; + if (item->const_item()) + { + if (item->is_null()) + always_null= true; + } + else + non_const_items++; } if (always_null) return FALSE; @@ -861,16 +893,27 @@ bool Aggregator_distinct::setup(THD *thd) } } } + + bool allow_packing= item_sum->is_packing_allowed(table, &tree_key_length); + + if (allow_packing) + { + compare_key= (qsort_cmp2) composite_packed_key_cmp; + cmp_arg= (void*)this; + } + DBUG_ASSERT(tree == 0); tree= new Unique(compare_key, cmp_arg, tree_key_length, - item_sum->ram_limitation(thd)); + item_sum->ram_limitation(thd), 0, allow_packing); /* The only time tree_key_length could be 0 is if someone does count(distinct) on a char(0) field - stupid thing to do, but this has to be handled - otherwise someone can crash the server with a DoS attack */ - if (! tree) + if (!tree || tree->setup(thd, item_sum, + non_const_items, item_sum->get_arg_count(), + TRUE)) return TRUE; } return FALSE; @@ -987,9 +1030,23 @@ bool Aggregator_distinct::add() if (copy_funcs(tmp_table_param->items_to_copy, table->in_use)) return TRUE; - for (Field **field=table->field ; *field ; field++) - if ((*field)->is_real_null(0)) - return 0; // Don't count NULL + uchar *to= NULL, *orig_to= table->record[0] + table->s->null_bytes; + uint packed_length= 0; + + if (is_distinct_packed()) + { + orig_to= to= tree->get_packed_rec_ptr(); + if ((to= make_packed_record(to)) == NULL) + return false; + packed_length= static_cast<uint>(to - orig_to); + Unique::store_packed_length(orig_to, packed_length); + } + else + { + for (Field **field=table->field ; *field ; field++) + if ((*field)->is_real_null(0)) + return 0; // Don't count NULL + } if (tree) { @@ -999,7 +1056,8 @@ bool Aggregator_distinct::add() bloat the tree without providing any valuable info. Besides, key_length used to initialize the tree didn't include space for them. */ - return tree->unique_add(table->record[0] + table->s->null_bytes); + return tree->unique_add(orig_to, tree->is_packed() ? + packed_length : tree->get_size()); } if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))) && table->file->is_fatal_error(error, HA_CHECK_DUP)) @@ -1017,7 +1075,7 @@ bool Aggregator_distinct::add() '0' values are also stored in the tree. This doesn't matter for SUM(DISTINCT), but is important for AVG(DISTINCT) */ - return tree->unique_add(table->field[0]->ptr); + return tree->unique_add(table->field[0]->ptr, tree->get_size()); } } @@ -1778,6 +1836,48 @@ bool Aggregator_distinct::unique_walk_function_for_count(void *element) } +/* + @brief + Checks whether the unique tree is packed or not + + @retval + TRUE tree stored packed values + FALSE otherwise +*/ + +bool Aggregator_distinct::is_distinct_packed() +{ + return tree && tree->is_packed(); + +} + + +/* + @brief + Make a record with packed values + + @param + to buffer to store the packed value +*/ +uchar *Aggregator_distinct::make_packed_record(uchar *to) +{ + to+= Unique::size_of_length_field; + + for (Field **field=table->field ; *field ; field++) + { + Field *fld= (*field); + if (fld->is_real_null(0)) + return NULL; // Don't count NULL + + /* + Storing the packed values for the non-const fields of the record + */ + to+= fld->make_packed_record_field(to); + } + return to; +} + + Aggregator_distinct::~Aggregator_distinct() { if (tree) @@ -3612,6 +3712,33 @@ int group_concat_key_cmp_with_distinct_with_nulls(void* arg, /** + Compares the packed values for fields in expr list of GROUP_CONCAT. + @note + + GROUP_CONCAT([DISTINCT] expr [,expr ...] + [ORDER BY {unsigned_integer | col_name | expr} + [ASC | DESC] [,col_name ...]] + [SEPARATOR str_val]) + + @return + @retval -1 : key1 < key2 + @retval 0 : key1 = key2 + @retval 1 : key1 > key2 +*/ +int group_concat_packed_key_cmp_with_distinct(void *arg, + const void *a_ptr, + const void *b_ptr) +{ + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + + DBUG_ASSERT(item_func->unique_filter); + uchar *a= (uchar*)a_ptr; + uchar *b= (uchar*)b_ptr; + return item_func->unique_filter->compare_packed_keys(a, b); +} + + +/** function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... ) */ @@ -3778,10 +3905,21 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), String tmp((char *)table->record[1], table->s->reclength, default_charset_info); String tmp2; - uchar *key= (uchar *) key_arg; + const uchar *key= (const uchar *) key_arg; + const uchar *key_end= NULL; String *result= &item->result; Item **arg= item->args, **arg_end= item->args + item->arg_count_field; uint old_length= result->length(); + SORT_FIELD *pos; + + const bool packed= item->is_distinct_packed(); + + if (packed) + { + pos= item->unique_filter->get_sortorder(); + key_end= key + item->unique_filter->get_full_size(); + key+= Unique::size_of_length_field; + } ulonglong *offset_limit= &item->copy_offset_limit; ulonglong *row_limit = &item->copy_row_limit; @@ -3822,11 +3960,43 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), Field *field= (*arg)->get_tmp_table_field(); if (field) { - uint offset= (field->offset(field->table->record[0]) - - table->s->null_bytes); - DBUG_ASSERT(offset < table->s->reclength); - res= item->get_str_from_field(*arg, field, &tmp, key, - offset + item->get_null_bytes()); + if (packed) + { + DBUG_ASSERT(pos->field == field); + if (!item->skip_nulls() && field->maybe_null()) + { + if (*key == 0) + { + // Case with NULL value + field->set_null(); + res= item->get_str_from_field(*arg, field, &tmp); + key++; + } + else + { + // Case with NOT NULL value + field->set_notnull(); + const uchar *end= field->unpack(field->ptr, key+1, key_end, 0); + res= item->get_str_from_field(*arg, field, &tmp); + key= end; + } + } + else + { + const uchar *end= field->unpack(field->ptr, key, key_end, 0); + res= item->get_str_from_field(*arg, field, &tmp); + key= end; + } + pos++; + } + else + { + uint offset= (field->offset(field->table->record[0]) - + table->s->null_bytes); + DBUG_ASSERT(offset < table->s->reclength); + res= item->get_str_from_field(*arg, field, &tmp, key, + offset + item->get_null_bytes()); + } } else res= item->get_str_from_item(*arg, &tmp); @@ -4137,6 +4307,14 @@ bool Item_func_group_concat::add(bool exclude_nulls) size_t row_str_len= 0; StringBuffer<MAX_FIELD_WIDTH> buf; String *res; + uchar *to= NULL, *orig_to= get_record_pointer(); + + if (is_distinct_packed()) + { + orig_to= to= unique_filter->get_packed_rec_ptr(); + to+= Unique::size_of_length_field; + } + for (uint i= 0; i < arg_count_field; i++) { Item *show_item= args[i]; @@ -4151,6 +4329,24 @@ bool Item_func_group_concat::add(bool exclude_nulls) return 0; // Skip row if it contains null if (tree && (res= field->val_str(&buf))) row_str_len+= res->length(); + + /* + Storing the packed values for the non-const fields of the record + */ + if (is_distinct_packed()) + { + if (!exclude_nulls && field->maybe_null()) + { + if (field->is_null_in_record((const uchar*) table->record[0])) + { + *to++=0; + continue; + } + *to++=1; + } + uchar* end= field->pack(to, field->ptr); + to+= static_cast<uint>(end - to); + } } else { @@ -4167,9 +4363,17 @@ bool Item_func_group_concat::add(bool exclude_nulls) if (distinct) { + DBUG_ASSERT(unique_filter); + uint packed_length= unique_filter->get_size(); + + if (unique_filter->is_packed()) + { + packed_length= static_cast<uint>(to - orig_to); + Unique::store_packed_length(orig_to, packed_length); + } /* Filter out duplicate rows. */ uint count= unique_filter->elements_in_tree(); - unique_filter->unique_add(get_record_pointer()); + unique_filter->unique_add(get_record_pointer(), packed_length); if (count == unique_filter->elements_in_tree()) row_eligible= FALSE; } @@ -4285,16 +4489,22 @@ bool Item_func_group_concat::setup(THD *thd) /* Push all not constant fields to the list and create a temp table */ always_null= 0; + uint non_const_items= 0; for (uint i= 0; i < arg_count_field; i++) { Item *item= args[i]; if (list.push_back(item, thd->mem_root)) DBUG_RETURN(TRUE); - if (item->const_item() && item->is_null() && skip_nulls()) + if (item->const_item()) { - always_null= 1; - DBUG_RETURN(FALSE); + if (item->is_null() && skip_nulls()) + { + always_null= 1; + DBUG_RETURN(FALSE); + } } + else + non_const_items++; } List<Item> all_fields(list); @@ -4377,6 +4587,7 @@ bool Item_func_group_concat::setup(THD *thd) the row is not added to the result. */ uint tree_key_length= table->s->reclength - table->s->null_bytes; + bool allow_packing= is_packing_allowed(&tree_key_length); if (arg_count_order) { @@ -4395,10 +4606,16 @@ bool Item_func_group_concat::setup(THD *thd) } if (distinct) - unique_filter= new Unique(get_comparator_function_for_distinct(), + { + unique_filter= new Unique(get_comparator_function_for_distinct(allow_packing), (void*)this, tree_key_length + get_null_bytes(), - ram_limitation(thd)); + ram_limitation(thd), 0, allow_packing); + + if (!unique_filter || unique_filter->setup(thd, this, non_const_items, + arg_count_field, skip_nulls())) + DBUG_RETURN(TRUE); + } if ((row_limit && row_limit->cmp_type() != INT_RESULT) || (offset_limit && offset_limit->cmp_type() != INT_RESULT)) { @@ -4456,11 +4673,13 @@ String* Item_func_group_concat::val_str(String* str) Get the comparator function for DISTINT clause */ -qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct() +qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct(bool packed) { - return skip_nulls() ? - group_concat_key_cmp_with_distinct : - group_concat_key_cmp_with_distinct_with_nulls; + return packed ? + group_concat_packed_key_cmp_with_distinct : + (skip_nulls() ? + group_concat_key_cmp_with_distinct : + group_concat_key_cmp_with_distinct_with_nulls); } @@ -4488,9 +4707,11 @@ qsort_cmp2 Item_func_group_concat::get_comparator_function_for_order_by() uchar* Item_func_group_concat::get_record_pointer() { - return skip_nulls() ? - table->record[0] + table->s->null_bytes : - table->record[0]; + return is_distinct_packed() ? + unique_filter->get_packed_rec_ptr() : + (skip_nulls() ? + table->record[0] + table->s->null_bytes : + table->record[0]); } @@ -4511,6 +4732,102 @@ uint Item_func_group_concat::get_null_bytes() } +/* + @brief + Checks whether the unique tree is packed or not + + @retval + TRUE tree stored packed values + FALSE otherwise +*/ + +bool Item_func_group_concat::is_distinct_packed() +{ + return unique_filter && unique_filter->is_packed(); +} + + +/* + @brief + Checks if one can store packed values in the Unique tree + + @param + total_length [OUT] length of the key in the tree + + @retval + TRUE packing allowed + FALSE packing not allowed +*/ + +bool Item_func_group_concat::is_packing_allowed(uint* total_length) +{ + /* + TODO varun: + Currently Unique is not packed if ORDER BY clause is used + This needs to be implemented when MDEV-22089 is fixed + */ + if (!distinct || arg_count_order) + return false; + + return Item_sum::is_packing_allowed(table, total_length); +} + + +/* + @brief + Check if storing packed values inside the Unique tree is allowed + + @param table Table structure + @total_length [OUT] max length of the packed key(takes into account + the length bytes also) +*/ + + +bool Item_sum::is_packing_allowed(TABLE *table, uint* total_length) +{ + uint size_of_packable_fields= 0; + uint tot_length= 0; + for (uint i= 0; i < get_arg_count(); i++) + { + Item *item= args[i]; + if (item->const_item()) + continue; + Field *field= item->get_tmp_table_field(); + if (field) + { + /* + For blob columns packing is not allowed + */ + if (field->flags & BLOB_FLAG) + return false; + + tot_length+= field->sort_length_without_suffix(); + if (field->is_packable()) + { + size_of_packable_fields+= + number_storage_requirement(field->sort_length_without_suffix()); + } + } + } + + // All fields are of fixed size + if (size_of_packable_fields == 0) + return false; + + /* + TODO varun: we can introduce a heuristic here, no need to do packing if we + have small VARCHAR or CHAR + */ + *total_length= tot_length; + /* + Unique::size_of_lengt_field is the length bytes to store the packed length + for each record inserted in the Unique tree + */ + (*total_length)+= Unique::size_of_length_field + size_of_packable_fields; + return true; +} + + void Item_func_group_concat::print(String *str, enum_query_type query_type) { str->append(func_name()); diff --git a/sql/item_sum.h b/sql/item_sum.h index dc520ce2578..39d5b79c1c4 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -590,6 +590,7 @@ public: bool with_sum_func() const { return true; } virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); } + bool is_packing_allowed(TABLE* table, uint* total_length); }; @@ -696,7 +697,10 @@ public: bool unique_walk_function(void *element); bool unique_walk_function_for_count(void *element); + bool is_distinct_packed(); static int composite_key_cmp(void* arg, uchar* key1, uchar* key2); + static int composite_packed_key_cmp(void* arg, uchar* key1, uchar* key2); + uchar* make_packed_record(uchar *to); }; @@ -1920,6 +1924,9 @@ protected: friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1, const void* key2); + friend int group_concat_packed_key_cmp_with_distinct(void *arg, + const void *key1, + const void *key2); friend int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); friend int group_concat_key_cmp_with_order_with_nulls(void *arg, @@ -1941,6 +1948,9 @@ protected: virtual String *get_str_from_field(Item *i, Field *f, String *tmp, const uchar *key, size_t offset) { return f->val_str(tmp, key + offset); } + virtual String *get_str_from_field(Item *i, Field *f, String *tmp) + { return f->val_str(tmp); } + virtual void cut_max_length(String *result, uint old_length, uint max_length) const; public: @@ -2015,11 +2025,12 @@ public: { context= (Name_resolution_context *)cntx; return FALSE; } Item *get_copy(THD *thd) { return get_item_copy<Item_func_group_concat>(thd, this); } - qsort_cmp2 get_comparator_function_for_distinct(); + qsort_cmp2 get_comparator_function_for_distinct(bool packed); qsort_cmp2 get_comparator_function_for_order_by(); uchar* get_record_pointer(); uint get_null_bytes(); - + bool is_distinct_packed(); + bool is_packing_allowed(uint* total_length); }; #endif /* ITEM_SUM_INCLUDED */ diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 80d67d884bf..7d65c85129b 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -11772,7 +11772,7 @@ int read_keys_and_merge_scans(THD *thd, } DBUG_ASSERT(file->ref_length == unique->get_size()); - DBUG_ASSERT(thd->variables.sortbuff_size == unique->get_max_in_memory_size()); + DBUG_ASSERT(thd->variables.sortbuff_size <= unique->get_max_in_memory_size()); for (;;) { @@ -11815,7 +11815,7 @@ int read_keys_and_merge_scans(THD *thd, continue; cur_quick->file->position(cur_quick->record); - if (unique->unique_add((char*)cur_quick->file->ref)) + if (unique->unique_add((char*)cur_quick->file->ref, unique->get_size())) goto err; } diff --git a/sql/sql_class.h b/sql/sql_class.h index 58a786ed33c..c158d7c5a14 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -6454,6 +6454,7 @@ struct SORT_FIELD: public SORT_FIELD_ATTR Field *field; /* Field to sort */ Item *item; /* Item if not sorting fields */ bool reverse; /* if descending sort */ + void setup(Field *fld, bool exclude_nulls, bool with_suffix); }; diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 5942efcd601..db5b5b777a8 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -714,7 +714,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, { table->file->position(table->record[0]); if (unlikely((error= - deltempfile->unique_add((char*) table->file->ref)))) + deltempfile->unique_add((char*) table->file->ref, + deltempfile->get_size())))) { error= 1; goto terminate_delete; @@ -1350,7 +1351,8 @@ int multi_delete::send_data(List<Item> &values) } else { - error=tempfiles[secure_counter]->unique_add((char*) table->file->ref); + uint size= tempfiles[secure_counter]->get_size(); + error=tempfiles[secure_counter]->unique_add((char*) table->file->ref, size); if (unlikely(error)) { error= 1; // Fatal error diff --git a/sql/sql_sort.h b/sql/sql_sort.h index 40f0c5ede5f..d512a9727b6 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -310,6 +310,8 @@ public: sort_length+= len; } + int compare_keys(uchar *a, uchar *b); + static const uint size_of_length_field= 4; private: @@ -567,8 +569,8 @@ public: bool using_packed_sortkeys() const { - DBUG_ASSERT(m_using_packed_sortkeys == - (sort_keys != NULL && sort_keys->using_packed_sortkeys())); + DBUG_ASSERT(sort_keys == NULL || + (m_using_packed_sortkeys == sort_keys->using_packed_sortkeys())); return m_using_packed_sortkeys; } @@ -578,6 +580,11 @@ public: return addon_fields != NULL; } + void set_using_packed_keys(bool val) + { + m_using_packed_sortkeys= val; + } + uint32 get_result_length(uchar *plen) { if (!m_using_packed_addons) @@ -659,6 +666,12 @@ public: { return m_packed_format; } + void set_packed_format(bool val) + { + m_packed_format= val; + } + + uint32 get_record_length_for_unique(uchar *to, uint size_of_dupl_count); private: uint m_packable_length; diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 879955af723..bf6cebbf7e8 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1570,7 +1570,16 @@ public: return 0; if (count > bucket_capacity * (curr_bucket + 1)) { - column->store_field_value((uchar *) elem, col_length); + uchar *to= (uchar* )elem; + if (column->is_packable()) + { + column->unpack(column->ptr, + to + Unique::size_of_length_field, + to + Unique::read_packed_length(to), 0); + } + else + column->store_field_value(to, col_length); + histogram->set_value(curr_bucket, column->pos_in_interval(min_value, max_value)); curr_bucket++; @@ -1646,13 +1655,11 @@ public: of the parameters to be passed to the constructor of the Unique object. */ - Count_distinct_field(Field *field, size_t max_heap_table_size) + Count_distinct_field(Field *field) { table_field= field; - tree_key_length= field->pack_length(); - - tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, - tree_key_length, max_heap_table_size, 1); + tree_key_length= 0; + tree= NULL; } virtual ~Count_distinct_field() @@ -1670,6 +1677,48 @@ public: return (tree != NULL); } + + /* + @brief + Create and setup the Unique object for the column + + @param + thd Thread structure + max_heap_table_size max allowed size of the unique tree + */ + bool setup(THD *thd, size_t max_heap_table_size) + { + if (table_field->is_packable()) + { + tree_key_length= table_field->max_packed_col_length(table_field->pack_length()); + + tree_key_length+= Unique::size_of_length_field; + tree= new Unique((qsort_cmp2) simple_packed_str_key_cmp, (void*) this, + tree_key_length, max_heap_table_size, 1, TRUE); + } + else + { + if (table_field->type() == MYSQL_TYPE_BIT) + { + tree_key_length= sizeof(ulonglong); + tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, + (void*) &tree_key_length, + tree_key_length, max_heap_table_size, 1, FALSE); + } + else + { + tree_key_length= table_field->pack_length(); + tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) table_field, + tree_key_length, max_heap_table_size, 1, FALSE); + } + } + if (!tree) + return true; // OOM + + return tree->setup(thd, table_field); + } + + /* @brief Add the value of 'field' to the container of the Unique object 'tree' @@ -1677,7 +1726,20 @@ public: virtual bool add() { table_field->mark_unused_memory_as_defined(); - return tree->unique_add(table_field->ptr); + uchar *orig_to= table_field->ptr; + DBUG_ASSERT(tree); + + uint length= tree->get_size(); + if (tree->is_packed()) + { + uchar *to; + orig_to= to= tree->get_packed_rec_ptr(); + to+= Unique::size_of_length_field; + to+= table_field->make_packed_record_field(to); + length= static_cast<uint>(to - orig_to); + Unique::store_packed_length(orig_to, length); + } + return tree->unique_add(orig_to, length); } /* @@ -1733,16 +1795,31 @@ public: return table_field->collected_stats->histogram.get_values(); } + static int simple_packed_str_key_cmp(void* arg, uchar* key1, uchar* key2); + static int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2); + }; -static -int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2) +int Count_distinct_field::simple_ulonglong_key_cmp(void* arg, + uchar* key1, uchar* key2) { ulonglong *val1= (ulonglong *) key1; ulonglong *val2= (ulonglong *) key2; return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1; } + + +/* + @brief + Compare function for packed keys +*/ +int Count_distinct_field::simple_packed_str_key_cmp(void* arg, + uchar* key1, uchar* key2) +{ + Count_distinct_field *compare_arg= (Count_distinct_field*)arg; + return compare_arg->tree->compare_packed_keys(key1, key2); +} /* @@ -1755,20 +1832,12 @@ class Count_distinct_field_bit: public Count_distinct_field { public: - Count_distinct_field_bit(Field *field, size_t max_heap_table_size) - { - table_field= field; - tree_key_length= sizeof(ulonglong); - - tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, - (void*) &tree_key_length, - tree_key_length, max_heap_table_size, 1); - } + Count_distinct_field_bit(Field *field): Count_distinct_field(field){} bool add() { longlong val= table_field->val_int(); - return tree->unique_add(&val); + return tree->unique_add(&val, tree->get_size()); } }; @@ -2344,8 +2413,11 @@ void Column_statistics_collected::init(THD *thd, Field *table_field) { count_distinct= table_field->type() == MYSQL_TYPE_BIT ? - new Count_distinct_field_bit(table_field, max_heap_table_size) : - new Count_distinct_field(table_field, max_heap_table_size); + new Count_distinct_field_bit(table_field) : + new Count_distinct_field(table_field); + + if (count_distinct && count_distinct->setup(thd, max_heap_table_size)) + count_distinct= NULL; } if (count_distinct && !count_distinct->exists()) count_distinct= NULL; diff --git a/sql/uniques.cc b/sql/uniques.cc index a0cebe3e4dd..d8772f366fb 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -48,12 +48,14 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique) when tree implementation chooses to store pointer to key in TREE_ELEMENT (instead of storing the element itself there) */ - return my_b_write(&unique->file, key, unique->size) ? 1 : 0; + return (unique->is_packed() ? + my_b_write(&unique->file, key, Unique::read_packed_length(key)) : + my_b_write(&unique->file, key, unique->size)) ? 1 : 0; } int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique) { - return my_b_write(&unique->file, key, unique->size) || + return unique_write_to_file(key, count, unique) || my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0; } @@ -79,10 +81,14 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, uint size_arg, size_t max_in_memory_size_arg, - uint min_dupl_count_arg) + uint min_dupl_count_arg, bool packed_arg) :max_in_memory_size(max_in_memory_size_arg), size(size_arg), + packed(packed_arg), + memory_used(0), packed_rec_ptr(NULL), + sortorder(NULL), sort_keys(NULL), elements(0) + { my_b_clear(&file); min_dupl_count= min_dupl_count_arg; @@ -90,18 +96,32 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, if (min_dupl_count_arg) full_size+= sizeof(element_count); with_counters= MY_TEST(min_dupl_count_arg); - init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func, + init_tree(&tree, (max_in_memory_size / 16), 0, 0, comp_func, NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC)); /* If the following fail's the next add will also fail */ my_init_dynamic_array(PSI_INSTRUMENT_ME, &file_ptrs, sizeof(Merge_chunk), 16, 16, MYF(MY_THREAD_SPECIFIC)); + + if (packed) + { + packed_rec_ptr= (uchar *)my_malloc(PSI_INSTRUMENT_ME, + size, + MYF(MY_WME | MY_THREAD_SPECIFIC)); + } /* If you change the following, change it in get_max_elements function, too. */ max_elements= (ulong) (max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+size)); if (!max_elements) + { max_elements= 1; + /* + Need to ensure that we have memory to store atleast one record + in the Unique tree + */ + max_in_memory_size= sizeof(TREE_ELEMENT) + size; + } (void) open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE, MYF(MY_WME)); @@ -371,6 +391,7 @@ Unique::~Unique() close_cached_file(&file); delete_tree(&tree, 0); delete_dynamic(&file_ptrs); + my_free(packed_rec_ptr); } @@ -389,6 +410,10 @@ bool Unique::flush() (void*) this, left_root_right) || insert_dynamic(&file_ptrs, (uchar*) &file_ptr)) return 1; + /** + the tree gets reset so make sure the memory used is reset too + */ + memory_used= 0; delete_tree(&tree, 0); return 0; } @@ -495,7 +520,8 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, uint key_length, Merge_chunk *begin, Merge_chunk *end, tree_walk_action walk_action, void *walk_action_arg, qsort_cmp2 compare, void *compare_arg, - IO_CACHE *file, bool with_counters) + IO_CACHE *file, bool with_counters, + uint min_dupl_count, bool packed) { BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg }; QUEUE queue; @@ -521,7 +547,12 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, // read_to_buffer() needs only rec_length. Sort_param sort_param; sort_param.rec_length= key_length; + sort_param.sort_length= key_length; + sort_param.min_dupl_count= min_dupl_count; + DBUG_ASSERT(sort_param.res_length == 0); DBUG_ASSERT(!sort_param.using_addon_fields()); + sort_param.set_using_packed_keys(packed); + uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0; /* Invariant: queue must contain top element from each tree, until a tree @@ -534,7 +565,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, top->set_buffer(merge_buffer + (top - begin) * piece_size, merge_buffer + (top - begin) * piece_size + piece_size); top->set_max_keys(max_key_count_per_piece); - bytes_read= read_to_buffer(file, top, &sort_param, false); + bytes_read= read_to_buffer(file, top, &sort_param, packed); if (unlikely(bytes_read == (ulong) -1)) goto end; DBUG_ASSERT(bytes_read); @@ -555,6 +586,10 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, read next key from the cache or from the file and push it to the queue; this gives new top. */ + key_length= sort_param.get_record_length_for_unique((uchar*)old_key, + size_of_dupl_count); + + cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); top->advance_current_key(key_length); top->decrement_mem_count(); if (top->mem_count()) @@ -564,7 +599,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, /* save old_key not to overwrite it in read_to_buffer */ memcpy(save_key_buff, old_key, key_length); old_key= save_key_buff; - bytes_read= read_to_buffer(file, top, &sort_param, false); + bytes_read= read_to_buffer(file, top, &sort_param, packed); if (unlikely(bytes_read == (ulong) -1)) goto end; else if (bytes_read) /* top->key, top->mem_count are reset */ @@ -604,15 +639,18 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, { do { - + key_length= sort_param.get_record_length_for_unique(top->current_key(), + size_of_dupl_count); + cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); cnt= with_counters ? get_counter_from_merged_element(top->current_key(), cnt_ofs) : 1; if (walk_action(top->current_key(), cnt, walk_action_arg)) goto end; + top->advance_current_key(key_length); } while (top->decrement_mem_count()); - bytes_read= read_to_buffer(file, top, &sort_param, false); + bytes_read= read_to_buffer(file, top, &sort_param, packed); if (unlikely(bytes_read == (ulong) -1)) goto end; } @@ -678,7 +716,8 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) (Merge_chunk *) file_ptrs.buffer, (Merge_chunk *) file_ptrs.buffer + file_ptrs.elements, action, walk_action_arg, - tree.compare, tree.custom_arg, &file, with_counters); + tree.compare, tree.custom_arg, &file, with_counters, + min_dupl_count, is_packed()); } my_free(merge_buffer); return res; @@ -730,9 +769,11 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, sort_param.max_keys_per_buffer= (uint) MY_MAX((max_in_memory_size / sort_param.sort_length), MERGEBUFF2); sort_param.not_killable= 1; + sort_param.set_using_packed_keys(is_packed()); + sort_param.set_packed_format(is_packed()); sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer * - sort_param.sort_length); + sort_param.sort_length); sort_param.compare= (qsort2_cmp) buffpek_compare; sort_param.cmp_context.key_compare= tree.compare; @@ -760,7 +801,8 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, file_ptrs.elements= maxbuffer+1; return 0; } - if (merge_index(&sort_param, Bounds_checked_array<uchar>(buff, buff_size), + if (merge_index(&sort_param, + Bounds_checked_array<uchar>(buff, buff_size), file_ptr, maxbuffer, &file, outfile)) goto err; error= 0; @@ -789,6 +831,8 @@ bool Unique::get(TABLE *table) sort.return_rows= elements+tree.elements_in_tree; DBUG_ENTER("Unique::get"); + DBUG_ASSERT(packed == FALSE); + if (my_b_tell(&file) == 0) { /* Whole tree is in memory; Don't use disk if you don't need to */ @@ -831,3 +875,114 @@ err: my_free(sort_buffer); DBUG_RETURN(rc); } + + +/* + @brief + Setup the structures that are used when Unique stores packed values + + @param thd thread structure + @param item item of aggregate function + @param non_const_args number of non constant arguments + @param arg_count total number of arguments + @param exclude_nulls TRUE: NULLS should be excluded + FALSE: otherwise + + @note + This implementation is used by GROUP_CONCAT as it can have more than + one arguments in the argument list. + + @retval + TRUE error + FALSE setup successful +*/ + +bool +Unique::setup(THD *thd, Item_sum *item, uint non_const_args, + uint arg_count, bool exclude_nulls) +{ + if (!packed) // no packing so don't create the sortorder list + return false; + SORT_FIELD *sort,*pos; + if (sortorder) + return false; + DBUG_ASSERT(sort_keys == NULL); + sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * non_const_args); + pos= sort= sortorder; + if (!pos) + return true; + sort_keys= new Sort_keys(sortorder, non_const_args); + if (!sort_keys) + return true; + sort=pos= sortorder; + for (uint i= 0; i < arg_count; i++) + { + Item *arg= item->get_arg(i); + if (arg->const_item()) + continue; + Field *field= arg->get_tmp_table_field(); + if (!field) + continue; + + pos->setup(field, exclude_nulls, false); + pos++; + } + return false; +} + + +/* + @brief + Setup the structures that are used when Unique stores packed values + + @param thd thread structure + @param field field structure + + @retval + TRUE error + FALSE setup successful +*/ + +bool Unique::setup(THD *thd, Field *field) +{ + if (!packed) // no packing so don't create the sortorder list + return false; + + SORT_FIELD *sort,*pos; + if (sortorder) + return false; + + DBUG_ASSERT(sort_keys == NULL); + sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD)); + pos= sort= sortorder; + if (!pos) + return true; + sort_keys= new Sort_keys(sortorder, 1); + if (!sort_keys) + return true; + sort=pos= sortorder; + pos->setup(field, true, false); // Nulls are always excluded + + return false; +} + + +/* + @brief + Compare two packed keys inside the Unique tree + + @param a_ptr packed sort key + @param b_ptr packed sort key + + @retval + >0 key a_ptr greater than b_ptr + =0 key a_ptr equal to b_ptr + <0 key a_ptr less than b_ptr + +*/ + +int Unique::compare_packed_keys(uchar *a_ptr, uchar *b_ptr) +{ + return sort_keys->compare_keys(a_ptr + Unique::size_of_length_field, + b_ptr + Unique::size_of_length_field); +} diff --git a/sql/uniques.h b/sql/uniques.h index 7e12a391fbd..1ade09408bd 100644 --- a/sql/uniques.h +++ b/sql/uniques.h @@ -44,26 +44,86 @@ class Unique :public Sql_alloc it to be written to record_pointers. always 0 for unions, > 0 for intersections */ bool with_counters; + /* + TRUE : Value stored is of variable size + FALSE : Value stored is of fixed size + */ + bool packed; + /* + size in bytes used for storing keys in the Unique tree + */ + size_t memory_used; + /* + Packed record ptr for a record of the table, the packed value in this + record is added to the unique tree + */ + uchar* packed_rec_ptr; + + /* + Array of SORT_FIELD structure storing the information about the key parts + in the sort key of the Unique tree + @see Unique::setup() + */ + SORT_FIELD *sortorder; + + /* + Structure storing information about usage of keys + */ + Sort_keys *sort_keys; bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge); bool flush(); + // return the amount of unused memory in the Unique tree + size_t space_left() + { + DBUG_ASSERT(max_in_memory_size >= memory_used); + return max_in_memory_size - memory_used; + } + + // Check if the Unique tree is full or not + bool is_full(size_t record_size) + { + if (!tree.elements_in_tree) // Atleast insert one element in the tree + return false; + return record_size > space_left(); + } + public: ulong elements; SORT_INFO sort; Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, - uint size_arg, size_t max_in_memory_size_arg, - uint min_dupl_count_arg= 0); + uint size_arg, size_t max_in_memory_size_arg, + uint min_dupl_count_arg= 0, bool packed_arg= false); ~Unique(); ulong elements_in_tree() { return tree.elements_in_tree; } - inline bool unique_add(void *ptr) + + /* + TODO varun: + remove the unique_add function above and pass the size as a parameter. + So for fixed types we would pass the fixed size as argument. + */ + + inline bool unique_add(void *ptr, uint size_arg) { DBUG_ENTER("unique_add"); DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements)); - if (!(tree.flag & TREE_ONLY_DUPS) && - tree.elements_in_tree >= max_elements && flush()) + TREE_ELEMENT *res; + size_t rec_size= size_arg + sizeof(TREE_ELEMENT) + tree.size_of_element; + + if (!(tree.flag & TREE_ONLY_DUPS) && is_full(rec_size) && flush()) DBUG_RETURN(1); - DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg)); + uint count= tree.elements_in_tree; + res= tree_insert(&tree, ptr, size_arg, tree.custom_arg); + if (tree.elements_in_tree != count) + { + /* + increment memory used only when a unique element is inserted + in the tree + */ + memory_used+= rec_size; + } + DBUG_RETURN(!res); } bool is_in_memory() { return (my_b_tell(&file) == 0); } @@ -96,7 +156,42 @@ public: bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg); uint get_size() const { return size; } + uint get_full_size() const { return full_size; } size_t get_max_in_memory_size() const { return max_in_memory_size; } + uchar *get_packed_rec_ptr() { return packed_rec_ptr; } + Sort_keys *get_keys() { return sort_keys; } + SORT_FIELD *get_sortorder() { return sortorder; } + bool is_count_stored() { return with_counters; } + + // returns TRUE if the unique tree stores packed values + bool is_packed() { return packed; } + + static void store_packed_length(uchar *p, uint sz) + { + int4store(p, sz - size_of_length_field); + } + + // returns the length of the key along with the length bytes for the key + static uint read_packed_length(uchar *p) + { + return size_of_length_field + uint4korr(p); + } + + bool setup(THD *thd, Item_sum *item, uint non_const_args, + uint arg_count, bool exclude_nulls); + bool setup(THD *thd, Field *field); + int compare_packed_keys(uchar *a, uchar *b); + + /* + TODO varun: + Currently the size_of_length_field is set to 4 but we can also use 2 here, + as fields are always inserted in the unique tree either from the base table + or from the temp table. + This does not look possible as the merge process + in merge buffers expect the length of dynmice sort keys to be stored in + 4 bytes, maybe this needs to be discussed + */ + static const uint size_of_length_field= 4; friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); |