diff options
Diffstat (limited to 'sql/filesort.cc')
-rw-r--r-- | sql/filesort.cc | 195 |
1 files changed, 156 insertions, 39 deletions
diff --git a/sql/filesort.cc b/sql/filesort.cc index ac43c96b0e0..e0389bb0343 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -35,6 +35,7 @@ #include "filesort_utils.h" #include "sql_select.h" #include "debug_sync.h" +#include "uniques.h" /* functions defined in this file */ @@ -1694,6 +1695,8 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, uint size_of_sort_length= param->using_packed_sortkeys() ? Sort_keys::size_of_length_field : 0; + uint size_of_dupl_count= param->min_dupl_count ? + sizeof(element_count) : 0; for (; ix < count; ++ix) { @@ -1709,14 +1712,16 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, buffpek->buffer_end()) break; // Incomplete record. - uchar *plen= record + sort_length; + uchar *plen= record + sort_length + size_of_dupl_count; + uint res_length= param->get_result_length(plen); if (plen + res_length > buffpek->buffer_end()) break; // Incomplete record. - DBUG_ASSERT(res_length > 0); + DBUG_ASSERT(!param->sort_keys || res_length > 0); DBUG_ASSERT(sort_length + res_length <= param->rec_length); record+= sort_length; record+= res_length; + record+= size_of_dupl_count; } DBUG_ASSERT(ix > 0); count= ix; @@ -1812,12 +1817,12 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, rec_length= param->rec_length; res_length= param->res_length; sort_length= param->sort_length; - uint dupl_count_ofs= rec_length-sizeof(element_count); uint min_dupl_count= param->min_dupl_count; + uint size_of_dupl_count= min_dupl_count ? sizeof(element_count) : 0; + bool check_dupl_count= flag && min_dupl_count; offset= (rec_length- (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length); - uint wr_len= flag ? res_length : rec_length; uint wr_offset= flag ? offset : 0; const bool using_packed_sortkeys= param->using_packed_sortkeys(); @@ -1867,10 +1872,16 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, Store it also in 'to_file'. */ buffpek= (Merge_chunk*) queue_top(&queue); + rec_length= param->get_record_length_for_unique(buffpek->current_key(), + size_of_dupl_count); + + DBUG_ASSERT(rec_length <= param->sort_length); + memcpy(unique_buff, buffpek->current_key(), rec_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); if (min_dupl_count) - memcpy(&dupl_count, unique_buff+dupl_count_ofs, - sizeof(dupl_count)); + memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count)); + buffpek->advance_current_key(rec_length); buffpek->decrement_mem_count(); if (buffpek->mem_count() == 0) @@ -1900,28 +1911,31 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, src= buffpek->current_key(); if (cmp) // Remove duplicates { - uchar *current_key= buffpek->current_key(); - if (!(*cmp)(first_cmp_arg, &unique_buff, ¤t_key)) + if (!(*cmp)(first_cmp_arg, &unique_buff, &src)) { if (min_dupl_count) { + uint dupl_count_ofs= rec_length - sizeof(element_count); element_count cnt; memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); dupl_count+= cnt; } goto skip_duplicate; } + if (min_dupl_count) { - memcpy(unique_buff+dupl_count_ofs, &dupl_count, - sizeof(dupl_count)); + DBUG_ASSERT(rec_length <= param->sort_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count)); } + res_length= rec_length - size_of_dupl_count; src= unique_buff; } + else + param->get_rec_and_res_len(src, &rec_length, &res_length); { - param->get_rec_and_res_len(buffpek->current_key(), - &rec_length, &res_length); const uint bytes_to_write= (flag == 0) ? rec_length : res_length; /* @@ -1943,10 +1957,15 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, } if (cmp) { + rec_length= param->get_record_length_for_unique(buffpek->current_key(), + size_of_dupl_count); + DBUG_ASSERT(rec_length <= param->sort_length); memcpy(unique_buff, buffpek->current_key(), rec_length); if (min_dupl_count) - memcpy(&dupl_count, unique_buff+dupl_count_ofs, - sizeof(dupl_count)); + { + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(&dupl_count, unique_buff + dupl_count_ofs, sizeof(dupl_count)); + } } if (!--max_rows) { @@ -1989,6 +2008,7 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, { if (min_dupl_count) { + uint dupl_count_ofs= rec_length - sizeof(element_count); element_count cnt; memcpy(&cnt, buffpek->current_key() + dupl_count_ofs, sizeof(cnt)); dupl_count+= cnt; @@ -1998,13 +2018,22 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, } if (min_dupl_count) - memcpy(unique_buff+dupl_count_ofs, &dupl_count, - sizeof(dupl_count)); + { + DBUG_ASSERT(rec_length <= param->sort_length); + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(unique_buff + dupl_count_ofs, &dupl_count, sizeof(dupl_count)); + } if (!check_dupl_count || dupl_count >= min_dupl_count) { src= unique_buff; - if (my_b_write(to_file, src+wr_offset, wr_len)) + res_length = rec_length - size_of_dupl_count; + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + if (my_b_write(to_file, + src + (offset_for_packing ? + rec_length - res_length : // sort length + wr_offset), + bytes_to_write)) goto err; /* purecov: inspected */ if (!--max_rows) goto end; @@ -2022,17 +2051,28 @@ bool merge_buffers(Sort_param *param, IO_CACHE *from_file, for (uint ix= 0; ix < buffpek->mem_count(); ++ix) { uchar *src= buffpek->current_key(); - param->get_rec_and_res_len(src, - &rec_length, &res_length); - const uint bytes_to_write= (flag == 0) ? rec_length : res_length; - if (check_dupl_count) + if (cmp) { - memcpy((uchar *) &dupl_count, - buffpek->current_key() + offset + dupl_count_ofs, - sizeof(dupl_count)); - if (dupl_count < min_dupl_count) - continue; + rec_length= param->get_record_length_for_unique(src, + size_of_dupl_count); + res_length= rec_length - size_of_dupl_count; + if (check_dupl_count) + { + /* + TODO varun: this looks incorrect to me + */ + uint dupl_count_ofs= rec_length - sizeof(element_count); + memcpy(&dupl_count, src + dupl_count_ofs, sizeof(dupl_count)); + + if (dupl_count < min_dupl_count) + continue; + } } + else + param->get_rec_and_res_len(src, &rec_length, &res_length); + + const uint bytes_to_write= (flag == 0) ? rec_length : res_length; + if(my_b_write(to_file, src + (offset_for_packing ? rec_length - res_length : // sort length @@ -2532,6 +2572,23 @@ void Sort_param::try_to_pack_sortkeys() rec_length= sort_length + addon_length; } +/* + @brief + Return the length of the record in the Unique tree + + @param + to key value + size_of_dupl_count if min_dupl_count > 0, then the record length + needs size_of_dupl_count to store the counter +*/ +uint32 Sort_param::get_record_length_for_unique(uchar *to, + uint size_of_dupl_count) +{ + return using_packed_sortkeys() ? + Unique::read_packed_length(to) + size_of_dupl_count : + rec_length; +} + uint Type_handler_string_result::make_packed_sort_key_part(uchar *to, Item *item, @@ -2742,6 +2799,45 @@ bool SORT_FIELD_ATTR::check_if_packing_possible(THD *thd) const /* + @brief + Setup the SORT_FIELD structure + + @param + fld field structure + exclude_nulls TRUE if nulls are not to be considered + with_suffix TRUE if length bytes needed to store the length + for binary charset + + @note + Currently used only by Unique object + TODD varun: we can refactor the code for filesort to use this function. + +*/ +void SORT_FIELD::setup(Field *fld, bool exclude_nulls, bool with_suffix) +{ + field= fld; + item= NULL; + /* + For unique needs to be set to FALSE always + but we can even pass the reverse as an argument to the function + */ + reverse= false; + original_length= length= (with_suffix ? + field->sort_length() : + field->sort_length_without_suffix()); + + cs= field->sort_charset(); + suffix_length= with_suffix ? field->sort_suffix_length() : 0; + type= field->is_packable() ? + SORT_FIELD_ATTR::VARIABLE_SIZE : + SORT_FIELD_ATTR::FIXED_SIZE; + maybe_null= exclude_nulls ? false : field->maybe_null(); + length_bytes= is_variable_sized() ? + number_storage_requirement(length) : 0; +} + + +/* Compare function used for packing sort keys */ @@ -2871,16 +2967,45 @@ int compare_packed_sort_keys(void *sort_param, unsigned char **a_ptr, unsigned char **b_ptr) { int retval= 0; - size_t a_len, b_len; Sort_param *param= (Sort_param*)sort_param; Sort_keys *sort_keys= param->sort_keys; uchar *a= *a_ptr; uchar *b= *b_ptr; - a+= Sort_keys::size_of_length_field; - b+= Sort_keys::size_of_length_field; - for (SORT_FIELD *sort_field= sort_keys->begin(); - sort_field != sort_keys->end(); sort_field++) + if ((retval= sort_keys->compare_keys(a + Sort_keys::size_of_length_field, + b + Sort_keys::size_of_length_field))) + return retval; + + /* + this comparison is done for the case when the sort keys is appended with + the ROW_ID pointer. For such cases we don't have addon fields + so we can make a memcmp check over both the sort keys + */ + if (!param->using_addon_fields()) + { + a+= Sort_keys::read_sortkey_length(a); + b+= Sort_keys::read_sortkey_length(b); + retval= memcmp(a, b, param->res_length); + } + return retval; +} + + +/* + @brief + Compare two sort keys + + @retval + >0 key a greater than b + =0 key a equal to b + <0 key a less than b +*/ + +int Sort_keys::compare_keys(uchar *a, uchar *b) +{ + int retval= 0; + size_t a_len, b_len; + for (SORT_FIELD *sort_field= begin(); sort_field != end(); sort_field++) { retval= sort_field->is_variable_sized() ? sort_field->compare_packed_varstrings(a, &a_len, b, &b_len) : @@ -2891,15 +3016,7 @@ int compare_packed_sort_keys(void *sort_param, a+= a_len; b+= b_len; - } - /* - this comparison is done for the case when the sort keys is appended with - the ROW_ID pointer. For such cases we don't have addon fields - so we can make a memcmp check over both the sort keys - */ - if (!param->using_addon_fields()) - retval= memcmp(a, b, param->res_length); return retval; } |