diff options
Diffstat (limited to 'sql/uniques.cc')
-rw-r--r-- | sql/uniques.cc | 106 |
1 files changed, 79 insertions, 27 deletions
diff --git a/sql/uniques.cc b/sql/uniques.cc index f5ab50c7c90..21aa66ec64d 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -48,6 +48,12 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique) return my_b_write(&unique->file, key, unique->size) ? 1 : 0; } +int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique) +{ + return my_b_write(&unique->file, key, unique->size) || + my_b_write(&unique->file, &count, sizeof(element_count)) ? 1 : 0; +} + int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) { memcpy(unique->record_pointers, key, unique->size); @@ -55,10 +61,28 @@ int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) return 0; } +int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique) +{ + if (count >= unique->min_dupl_count) + { + memcpy(unique->record_pointers, key, unique->size); + unique->record_pointers+=unique->size; + } + else + unique->filtered_out_elems++; + return 0; +} + + Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, - uint size_arg, ulonglong max_in_memory_size_arg) + uint size_arg, ulonglong max_in_memory_size_arg, + uint min_dupl_count_arg) :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0) { + min_dupl_count= min_dupl_count_arg; + full_size= size; + if (min_dupl_count_arg) + full_size+= sizeof(element_count); my_b_clear(&file); init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0, NULL, comp_func_fixed_arg); @@ -126,7 +150,8 @@ inline double log2_n_fact(double x) */ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, - uint *first, uint *last) + uint *first, uint *last, + uint compare_factor) { uint total_buf_elems= 0; for (uint *pbuf= first; pbuf <= last; pbuf++) @@ -137,7 +162,7 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, /* Using log2(n)=log(n)/log(2) formula */ return 2*((double)total_buf_elems*elem_size) / IO_SIZE + - total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2); + total_buf_elems*log((double) n_buffers) / (compare_factor * M_LN2); } @@ -170,7 +195,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, static double get_merge_many_buffs_cost(uint *buffer, uint maxbuffer, uint max_n_elems, - uint last_n_elems, int elem_size) + uint last_n_elems, int elem_size, + uint compare_factor) { register int i; double total_cost= 0.0; @@ -197,19 +223,22 @@ static double get_merge_many_buffs_cost(uint *buffer, { total_cost+=get_merge_buffers_cost(buff_elems, elem_size, buff_elems + i, - buff_elems + i + MERGEBUFF-1); + buff_elems + i + MERGEBUFF-1, + compare_factor); lastbuff++; } total_cost+=get_merge_buffers_cost(buff_elems, elem_size, buff_elems + i, - buff_elems + maxbuffer); + buff_elems + maxbuffer, + compare_factor); maxbuffer= lastbuff; } } /* Simulate final merge_buff call. */ total_cost += get_merge_buffers_cost(buff_elems, elem_size, - buff_elems, buff_elems + maxbuffer); + buff_elems, buff_elems + maxbuffer, + compare_factor); return total_cost; } @@ -224,7 +253,11 @@ static double get_merge_many_buffs_cost(uint *buffer, to get # bytes needed. nkeys #of elements in Unique key_size size of each elements in bytes - max_in_memory_size amount of memory Unique will be allowed to use + max_in_memory_size amount of memory Unique will be allowed to use + compare_factor used to calculate cost of one comparison + write_fl if the result must be saved written to disk + in_memory_elems OUT estimate of the number of elements in memory + if disk is not used RETURN Cost in disk seeks. @@ -261,15 +294,17 @@ static double get_merge_many_buffs_cost(uint *buffer, these will be random seeks. */ -double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, - ulonglong max_in_memory_size) +double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, + ulonglong max_in_memory_size, + uint compare_factor, + bool intersect_fl, bool *in_memory) { - ulong max_elements_in_tree; - ulong last_tree_elems; + size_t max_elements_in_tree; + size_t last_tree_elems; int n_full_trees; /* number of trees in unique - 1 */ double result; - max_elements_in_tree= ((ulong) max_in_memory_size / + max_elements_in_tree= ((size_t) max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size)); n_full_trees= nkeys / max_elements_in_tree; @@ -279,11 +314,15 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, result= 2*log2_n_fact(last_tree_elems + 1.0); if (n_full_trees) result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0); - result /= TIME_FOR_COMPARE_ROWID; + result /= compare_factor; - DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys, - n_full_trees, n_full_trees?max_elements_in_tree:0, - last_tree_elems)); + DBUG_PRINT("info",("unique trees sizes: %u=%u*%u + %u", (uint)nkeys, + (uint)n_full_trees, + (uint)(n_full_trees?max_elements_in_tree:0), + (uint)last_tree_elems)); + + if (in_memory) + *in_memory= !n_full_trees; if (!n_full_trees) return result; @@ -298,12 +337,12 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE); /* Cost of merge */ + if (intersect_fl) + key_size+= sizeof(element_count); double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees, max_elements_in_tree, - last_tree_elems, key_size); - if (merge_cost < 0.0) - return merge_cost; - + last_tree_elems, key_size, + compare_factor); result += merge_cost; /* Add cost of reading the resulting sequence, assuming there were no @@ -330,7 +369,10 @@ bool Unique::flush() file_ptr.count=tree.elements_in_tree; file_ptr.file_pos=my_b_tell(&file); - if (tree_walk(&tree, (tree_walk_action) unique_write_to_file, + tree_walk_action action= min_dupl_count ? + (tree_walk_action) unique_write_to_file_with_count : + (tree_walk_action) unique_write_to_file; + if (tree_walk(&tree, action, (void*) this, left_root_right) || insert_dynamic(&file_ptrs, (uchar*) &file_ptr)) return 1; @@ -360,6 +402,7 @@ Unique::reset() reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1); } elements= 0; + tree.flag= 0; } /* @@ -579,15 +622,19 @@ bool Unique::get(TABLE *table) { SORTPARAM sort_param; table->sort.found_records=elements+tree.elements_in_tree; - if (my_b_tell(&file) == 0) { /* Whole tree is in memory; Don't use disk if you don't need to */ if ((record_pointers=table->sort.record_pointers= (uchar*) my_malloc(size * tree.elements_in_tree, MYF(0)))) { - (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs, + tree_walk_action action= min_dupl_count ? + (tree_walk_action) unique_intersect_write_to_ptrs : + (tree_walk_action) unique_write_to_ptrs; + filtered_out_elems= 0; + (void) tree_walk(&tree, action, this, left_root_right); + table->sort.found_records-= filtered_out_elems; return 0; } } @@ -617,7 +664,9 @@ bool Unique::get(TABLE *table) sort_param.max_rows= elements; sort_param.sort_form=table; sort_param.rec_length= sort_param.sort_length= sort_param.ref_length= - size; + full_size; + sort_param.min_dupl_count= min_dupl_count; + sort_param.res_length= 0; sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length); sort_param.not_killable=1; @@ -638,8 +687,9 @@ bool Unique::get(TABLE *table) if (flush_io_cache(&file) || reinit_io_cache(&file,READ_CACHE,0L,0,0)) goto err; - if (merge_buffers(&sort_param, &file, outfile, sort_buffer, file_ptr, - file_ptr, file_ptr+maxbuffer,0)) + sort_param.res_length= sort_param.rec_length- + (min_dupl_count ? sizeof(min_dupl_count) : 0); + if (merge_index(&sort_param, sort_buffer, file_ptr, maxbuffer, &file, outfile)) goto err; error=0; err: @@ -654,3 +704,5 @@ err: outfile->end_of_file=save_pos; return error; } + + |