5.5 merge

author: Sergei Golubchik <sergii@pisem.net> 2013-01-29 15:10:47 +0100
committer: Sergei Golubchik <sergii@pisem.net> 2013-01-29 15:10:47 +0100
commit: 0af4b6c6ee2b8a61823478c0a56ebdfa52cae3cc (patch)
tree: 7b24eb150b9cca718c88edaabbfc6c8bb16fd015 /sql/uniques.cc
parent: cf20de000bdff07a34a373079991d24837423896 (diff)
parent: 52fbe44fbbe60ecaba6453884ec1ad32755d7a04 (diff)
download: mariadb-git-0af4b6c6ee2b8a61823478c0a56ebdfa52cae3cc.tar.gz
1 files changed, 100 insertions, 57 deletions
diff --git a/sql/uniques.cc b/sql/uniques.cc
index 22f098f8f9a..9fa06311ece 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -77,7 +77,10 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq
 Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
 	       uint size_arg, ulonglong max_in_memory_size_arg,
                uint min_dupl_count_arg)
-  :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0)
+  :max_in_memory_size(max_in_memory_size_arg),
+   record_pointers(NULL),
+   size(size_arg),
+   elements(0)
 {
   min_dupl_count= min_dupl_count_arg;
   full_size= size;
@@ -582,6 +585,7 @@ end:
   SYNOPSIS
     Unique:walk()
   All params are 'IN':
+    table   parameter for the call of the merge method
     action  function-visitor, typed in include/my_tree.h
             function is called for each unique element
     arg     argument for visitor, which is passed to it on each call
@@ -590,71 +594,67 @@ end:
     <> 0 error
  */
 
-bool Unique::walk(tree_walk_action action, void *walk_action_arg)
+bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
 {
-  int res;
+  int res= 0;
   uchar *merge_buffer;
 
   if (elements == 0)                       /* the whole tree is in memory */
     return tree_walk(&tree, action, walk_action_arg, left_root_right);
 
+  table->sort.found_records=elements+tree.elements_in_tree;
   /* flush current tree to the file to have some memory for merge buffer */
   if (flush())
     return 1;
   if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0))
     return 1;
-  if (!(merge_buffer= (uchar *) my_malloc((ulong) max_in_memory_size,
-                                          MYF(MY_THREAD_SPECIFIC))))
+  ulong buff_sz= (max_in_memory_size / full_size + 1) * full_size;
+  if (!(merge_buffer= (uchar *) my_malloc(buff_sz, MYF(MY_THREAD_SPECIFIC))))
     return 1;
-  res= merge_walk(merge_buffer, (ulong) max_in_memory_size, size,
-                  (BUFFPEK *) file_ptrs.buffer,
-                  (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
-                  action, walk_action_arg,
-                  tree.compare, tree.custom_arg, &file);
+  if (buff_sz < full_size * (file_ptrs.elements + 1UL))
+    res= merge(table, merge_buffer, buff_sz >= full_size * MERGEBUFF2) ;
+  
+  if (!res)
+  {  
+    res= merge_walk(merge_buffer, (ulong) max_in_memory_size, full_size,
+                    (BUFFPEK *) file_ptrs.buffer,
+                    (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
+                    action, walk_action_arg,
+                    tree.compare, tree.custom_arg, &file);
+  }
   my_free(merge_buffer);
   return res;
 }
 
+
 /*
-  Modify the TABLE element so that when one calls init_records()
-  the rows will be read in priority order.
-*/
+  DESCRIPTION
+    Perform multi-pass sort merge of the elements accessed through table->sort,
+    using the buffer buff as the merge buffer. The last pass is not performed
+    if without_last_merge is TRUE.
+  SYNOPSIS
+    Unique:merge()
+  All params are 'IN':
+    table               the parameter to access sort context
+    buff                merge buffer
+    without_last_merge  TRUE <=> do not perform the last merge
+  RETURN VALUE
+    0    OK
+    <> 0 error
+ */
 
-bool Unique::get(TABLE *table)
+bool Unique::merge(TABLE *table, uchar *buff, bool without_last_merge)
 {
-  table->sort.found_records=elements+tree.elements_in_tree;
-  if (my_b_tell(&file) == 0)
-  {
-    /* Whole tree is in memory;  Don't use disk if you don't need to */
-    if ((record_pointers=table->sort.record_pointers= (uchar*)
-	 my_malloc(size * tree.elements_in_tree, MYF(MY_THREAD_SPECIFIC))))
-    {
-      tree_walk_action action= min_dupl_count ?
-		         (tree_walk_action) unique_intersect_write_to_ptrs :
-		         (tree_walk_action) unique_write_to_ptrs;
-      filtered_out_elems= 0;
-      (void) tree_walk(&tree, action,
-		       this, left_root_right);
-      table->sort.found_records-= filtered_out_elems;
-      return 0;
-    }
-  }
-  /* Not enough memory; Save the result to file && free memory used by tree */
-  if (flush())
-    return 1;
-
-  IO_CACHE *outfile=table->sort.io_cache;
+  IO_CACHE *outfile= table->sort.io_cache;
   BUFFPEK *file_ptr= (BUFFPEK*) file_ptrs.buffer;
   uint maxbuffer= file_ptrs.elements - 1;
-  uchar *sort_buffer;
   my_off_t save_pos;
-  bool error=1;
-
-      /* Open cached file if it isn't open */
-  outfile=table->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
-                                                     MYF(MY_ZEROFILL |
-                                                         MY_THREAD_SPECIFIC));
+  bool error= 1;
 
+  /* Open cached file if it isn't open */
+  if (!outfile)
+    outfile= table->sort.io_cache= (IO_CACHE*) my_malloc(sizeof(IO_CACHE),
+                                          MYF(MY_THREAD_SPECIFIC|MY_ZEROFILL));
   if (!outfile ||
       (! my_b_inited(outfile) &&
        open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
@@ -665,20 +665,16 @@ bool Unique::get(TABLE *table)
   Sort_param sort_param; 
   bzero((char*) &sort_param,sizeof(sort_param));
   sort_param.max_rows= elements;
-  sort_param.sort_form=table;
+  sort_param.sort_form= table;
   sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
    full_size;
   sort_param.min_dupl_count= min_dupl_count;
   sort_param.res_length= 0;
   sort_param.max_keys_per_buffer= 
     (uint) (max_in_memory_size / sort_param.sort_length);
-  sort_param.not_killable=1;
+  sort_param.not_killable= 1;
 
-  if (!(sort_buffer=(uchar*) my_malloc((sort_param.max_keys_per_buffer+1) *
-				       sort_param.sort_length,
-				       MYF(MY_THREAD_SPECIFIC))))
-    return 1;
-  sort_param.unique_buff= sort_buffer+(sort_param.max_keys_per_buffer *
+  sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer *
 				       sort_param.sort_length);
 
   sort_param.compare= (qsort2_cmp) buffpek_compare;
@@ -686,27 +682,74 @@ bool Unique::get(TABLE *table)
   sort_param.cmp_context.key_compare_arg= tree.custom_arg;
 
   /* Merge the buffers to one file, removing duplicates */
-  if (merge_many_buff(&sort_param,sort_buffer,file_ptr,&maxbuffer,&file))
+  if (merge_many_buff(&sort_param,buff,file_ptr,&maxbuffer,&file))
     goto err;
   if (flush_io_cache(&file) ||
       reinit_io_cache(&file,READ_CACHE,0L,0,0))
     goto err;
   sort_param.res_length= sort_param.rec_length-
                          (min_dupl_count ? sizeof(min_dupl_count) : 0);
-  if (merge_index(&sort_param, sort_buffer, file_ptr, maxbuffer, &file, outfile))
+  if (without_last_merge)
+  {
+    file_ptrs.elements= maxbuffer+1;
+    return 0;
+  }
+  if (merge_index(&sort_param, buff, file_ptr, maxbuffer, &file, outfile))
     goto err;
-  error=0;
+  error= 0;
 err:
-  my_free(sort_buffer);
   if (flush_io_cache(outfile))
-    error=1;
+    error= 1;
 
   /* Setup io_cache for reading */
-  save_pos=outfile->pos_in_file;
+  save_pos= outfile->pos_in_file;
   if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
-    error=1;
+    error= 1;
   outfile->end_of_file=save_pos;
   return error;
 }
 
 
+/*
+  Modify the TABLE element so that when one calls init_records()
+  the rows will be read in priority order.
+*/
+
+bool Unique::get(TABLE *table)
+{
+  bool rc= 1;
+  uchar *sort_buffer= NULL;
+  table->sort.found_records= elements+tree.elements_in_tree;
+
+  if (my_b_tell(&file) == 0)
+  {
+    /* Whole tree is in memory;  Don't use disk if you don't need to */
+    if ((record_pointers=table->sort.record_pointers= (uchar*)
+	 my_malloc(size * tree.elements_in_tree, MYF(MY_THREAD_SPECIFIC))))
+    {
+      tree_walk_action action= min_dupl_count ?
+		         (tree_walk_action) unique_intersect_write_to_ptrs :
+		         (tree_walk_action) unique_write_to_ptrs;
+      filtered_out_elems= 0;
+      (void) tree_walk(&tree, action,
+		       this, left_root_right);
+      table->sort.found_records-= filtered_out_elems;
+      return 0;
+    }
+  }
+  /* Not enough memory; Save the result to file && free memory used by tree */
+  if (flush())
+    return 1;
+  
+  ulong buff_sz= (max_in_memory_size / full_size + 1) * full_size;
+  if (!(sort_buffer= (uchar*) my_malloc(buff_sz, MYF(MY_THREAD_SPECIFIC))))
+    return 1;
+
+  if (merge(table, sort_buffer, FALSE))
+    goto err;  
+  rc= 0;  
+
+err:  
+  my_free(sort_buffer);  
+  return rc;
+}
author	Sergei Golubchik <sergii@pisem.net>	2013-01-29 15:10:47 +0100
committer	Sergei Golubchik <sergii@pisem.net>	2013-01-29 15:10:47 +0100
commit	0af4b6c6ee2b8a61823478c0a56ebdfa52cae3cc (patch)
tree	7b24eb150b9cca718c88edaabbfc6c8bb16fd015 /sql/uniques.cc
parent	cf20de000bdff07a34a373079991d24837423896 (diff)
parent	52fbe44fbbe60ecaba6453884ec1ad32755d7a04 (diff)
download	mariadb-git-0af4b6c6ee2b8a61823478c0a56ebdfa52cae3cc.tar.gz