Removed TABLE->sort to make it possible to have multiple active calls to

filesort and init_read_record() for the same table. This will simplify code for WINDOW FUNCTIONS (MDEV-6115) - Filesort_info renamed to SORT_INFO and moved to filesort.h - filesort now returns SORT_INFO - init_read_record() now takes a SORT_INFO parameter. - unique declaration is moved to uniques.h - subselect caching of buffers is now more explicit than before - filesort_buffer is now reusable even if rec_length has changed. - filsort_free_buffers() and free_io_cache() calls are removed - Remove one malloc() when using get_addon_fields() Other things: - Added --debug-assert-on-not-freed-memory option to make it easier to debug some not-freed-memory issues.
author: Monty <monty@mariadb.org> 2016-03-22 21:51:59 +0200
committer: Monty <monty@mariadb.org> 2016-03-22 23:44:52 +0200
commit: 260dd476b057b759af7973550b560dc2f56e18fd (patch)
tree: ef55ede44a6f47171ca9f2a2121f377a6ea15832
parent: d0a47704c5d4360a3076c0e6b8abec186fac1f39 (diff)
download: mariadb-git-260dd476b057b759af7973550b560dc2f56e18fd.tar.gz
40 files changed, 662 insertions, 581 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index da1d54ef81d..6136c39fc9c 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -112,7 +112,8 @@ SET (SQL_SOURCE
                sql_statistics.cc sql_string.cc 
                sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc
                sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc 
-               sql_time.cc tztime.cc uniques.cc unireg.cc item_xmlfunc.cc 
+               sql_time.cc tztime.cc unireg.cc item_xmlfunc.cc 
+               uniques.cc uniques.h
                rpl_tblmap.cc sql_binlog.cc event_scheduler.cc event_data_objects.cc
                event_queue.cc event_db_repository.cc 
                sql_tablespace.cc events.cc ../sql-common/my_user.c 
diff --git a/sql/event_db_repository.cc b/sql/event_db_repository.cc
index e39f727800a..e7554da1969 100644
--- a/sql/event_db_repository.cc
+++ b/sql/event_db_repository.cc
@@ -499,7 +499,8 @@ Event_db_repository::table_scan_all_for_i_s(THD *thd, TABLE *schema_table,
   READ_RECORD read_record_info;
   DBUG_ENTER("Event_db_repository::table_scan_all_for_i_s");
 
-  if (init_read_record(&read_record_info, thd, event_table, NULL, 1, 0, FALSE))
+  if (init_read_record(&read_record_info, thd, event_table, NULL, NULL, 1, 0,
+                       FALSE))
     DBUG_RETURN(TRUE);
 
   /*
@@ -1015,7 +1016,7 @@ Event_db_repository::drop_schema_events(THD *thd, LEX_STRING schema)
     DBUG_VOID_RETURN;
 
   /* only enabled events are in memory, so we go now and delete the rest */
-  if (init_read_record(&read_record_info, thd, table, NULL, 1, 0, FALSE))
+  if (init_read_record(&read_record_info, thd, table, NULL, NULL, 1, 0, FALSE))
     goto end;
 
   while (!ret && !(read_record_info.read_record(&read_record_info)) )
diff --git a/sql/events.cc b/sql/events.cc
index b80ec993ac4..df487e88f67 100644
--- a/sql/events.cc
+++ b/sql/events.cc
@@ -1134,7 +1134,7 @@ Events::load_events_from_db(THD *thd)
     DBUG_RETURN(TRUE);
   }
 
-  if (init_read_record(&read_record_info, thd, table, NULL, 0, 1, FALSE))
+  if (init_read_record(&read_record_info, thd, table, NULL, NULL, 0, 1, FALSE))
   {
     close_thread_tables(thd);
     DBUG_RETURN(TRUE);
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 9289d712cbc..54a79421d2e 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -50,26 +50,27 @@ if (my_b_write((file),(uchar*) (from),param->ref_length)) \
 static uchar *read_buffpek_from_file(IO_CACHE *buffer_file, uint count,
                                      uchar *buf);
 static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
-                             Filesort_info *fs_info,
+                             SORT_INFO *fs_info,
                              IO_CACHE *buffer_file,
                              IO_CACHE *tempfile,
                              Bounded_queue<uchar, uchar> *pq,
                              ha_rows *found_rows);
-static bool write_keys(Sort_param *param, Filesort_info *fs_info,
+static bool write_keys(Sort_param *param, SORT_INFO *fs_info,
                       uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
 static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos);
 static void register_used_fields(Sort_param *param);
 static bool save_index(Sort_param *param, uint count,
-                       Filesort_info *table_sort);
+                       SORT_INFO *table_sort);
 static uint suffix_length(ulong string_length);
 static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
 		       bool *multi_byte_charset);
 static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data,
                                           Field **ptabfield,
-                                          uint sortlength, uint *plength);
+                                          uint sortlength,
+                                          LEX_STRING *addon_buf);
 static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
                                 uchar *buff, uchar *buff_end);
-static bool check_if_pq_applicable(Sort_param *param, Filesort_info *info,
+static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info,
                                    TABLE *table,
                                    ha_rows records, ulong memory_available);
 
@@ -78,6 +79,8 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
                                    ulong max_length_for_sort_data,
                                    ha_rows maxrows, bool sort_positions)
 {
+  DBUG_ASSERT(addon_field == 0 && addon_buf.length == 0);
+
   sort_length= sortlen;
   ref_length= table->file->ref_length;
   if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
@@ -85,13 +88,13 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
   {
     /* 
       Get the descriptors of all fields whose values are appended 
-      to sorted fields and get its total length in addon_length.
+      to sorted fields and get its total length in addon_buf.length
     */
     addon_field= get_addon_fields(max_length_for_sort_data,
-                                  table->field, sort_length, &addon_length);
+                                  table->field, sort_length, &addon_buf);
   }
   if (addon_field)
-    res_length= addon_length;
+    res_length= addon_buf.length;
   else
   {
     res_length= ref_length;
@@ -101,7 +104,7 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
     */
     sort_length+= ref_length;
   }
-  rec_length= sort_length + addon_length;
+  rec_length= sort_length + addon_buf.length;
   max_rows= maxrows;
 }
 
@@ -115,8 +118,9 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
   Before calling filesort, one must have done
   table->file->info(HA_STATUS_VARIABLE)
 
-  The result set is stored in table->io_cache or
-  table->record_pointers.
+  The result set is stored in
+  filesort_info->io_cache or
+  filesort_info->record_pointers.
 
   @param      thd            Current thread
   @param      table          Table to sort
@@ -124,28 +128,24 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
   @param      s_length       Number of elements in sortorder
   @param      select         Condition to apply to the rows
   @param      max_rows       Return only this many rows
-  @param      sort_positions Set to TRUE if we want to force sorting by position
+  @param      sort_positions Set to TRUE if we want to force sorting by
+			     position
                              (Needed by UPDATE/INSERT or ALTER TABLE or
                               when rowids are required by executor)
-  @param[out] examined_rows  Store number of examined rows here
-  @param[out] found_rows     Store the number of found rows here
-
   @note
     If we sort by position (like if sort_positions is 1) filesort() will
     call table->prepare_for_position().
 
   @retval
-    HA_POS_ERROR	Error
-  @retval
-    \#			Number of rows
+    0			Error
+    #			SORT_INFO
 */
 
-ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
-		 SQL_SELECT *select, ha_rows max_rows,
-                 bool sort_positions,
-                 ha_rows *examined_rows,
-                 ha_rows *found_rows,
-                 Filesort_tracker* tracker)
+SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
+                        uint s_length,
+                        SQL_SELECT *select, ha_rows max_rows,
+                        bool sort_positions,
+                        Filesort_tracker* tracker)
 {
   int error;
   size_t memory_available= thd->variables.sortbuff_size;
@@ -162,33 +162,37 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 #ifdef SKIP_DBUG_IN_FILESORT
   DBUG_PUSH("");		/* No DBUG here */
 #endif
-  Filesort_info table_sort= table->sort;
+  SORT_INFO *sort;
   TABLE_LIST *tab= table->pos_in_table_list;
   Item_subselect *subselect= tab ? tab->containing_subselect() : 0;
-
   MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str);
   DEBUG_SYNC(thd, "filesort_start");
 
+  if (!(sort= new SORT_INFO))
+    return 0;
+
+  if (subselect && subselect->filesort_buffer.is_allocated())
+  {
+    /* Reuse cache from last call */
+    sort->filesort_buffer= subselect->filesort_buffer;
+    sort->buffpek= subselect->sortbuffer;
+    subselect->filesort_buffer.reset();
+    subselect->sortbuffer.str=0;
+  }
+
+  outfile= &sort->io_cache;
+
   /*
    Release InnoDB's adaptive hash index latch (if holding) before
    running a sort.
   */
   ha_release_temporary_latches(thd);
 
-  /* 
-    Don't use table->sort in filesort as it is also used by 
-    QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end 
-    when index_merge select has finished with it.
-  */
-  table->sort.io_cache= NULL;
-  DBUG_ASSERT(table_sort.record_pointers == NULL);
-  
-  outfile= table_sort.io_cache;
   my_b_clear(&tempfile);
   my_b_clear(&buffpek_pointers);
   buffpek=0;
   error= 1;
-  *found_rows= HA_POS_ERROR;
+  sort->found_rows= HA_POS_ERROR;
 
   param.init_for_filesort(sortlength(thd, sortorder, s_length,
                                      &multi_byte_charset),
@@ -196,14 +200,12 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
                           thd->variables.max_length_for_sort_data,
                           max_rows, sort_positions);
 
-  table_sort.addon_buf= 0;
-  table_sort.addon_length= param.addon_length;
-  table_sort.addon_field= param.addon_field;
-  table_sort.unpack= unpack_addon_fields;
-  if (param.addon_field &&
-      !(table_sort.addon_buf=
-        (uchar *) my_malloc(param.addon_length, MYF(MY_WME |
-                                                    MY_THREAD_SPECIFIC))))
+  sort->addon_buf=    param.addon_buf;
+  sort->addon_field=  param.addon_field;
+  sort->unpack=       unpack_addon_fields;
+  if (multi_byte_charset &&
+      !(param.tmp_buffer= (char*) my_malloc(param.sort_length,
+                                            MYF(MY_WME | MY_THREAD_SPECIFIC))))
     goto err;
 
   if (select && select->quick)
@@ -216,12 +218,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   // If number of rows is not known, use as much of sort buffer as possible. 
   num_rows= table->file->estimate_rows_upper_bound();
 
-  if (multi_byte_charset &&
-      !(param.tmp_buffer= (char*) my_malloc(param.sort_length,
-                                            MYF(MY_WME | MY_THREAD_SPECIFIC))))
-    goto err;
-
-  if (check_if_pq_applicable(&param, &table_sort,
+  if (check_if_pq_applicable(&param, sort,
                              table, num_rows, memory_available))
   {
     DBUG_PRINT("info", ("filesort PQ is applicable"));
@@ -233,45 +230,31 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
                 true,                           // max_at_top
                 NULL,                           // compare_function
                 compare_length,
-                &make_sortkey, &param, table_sort.get_sort_keys()))
+                &make_sortkey, &param, sort->get_sort_keys()))
     {
       /*
        If we fail to init pq, we have to give up:
        out of memory means my_malloc() will call my_error().
       */
       DBUG_PRINT("info", ("failed to allocate PQ"));
-      table_sort.free_sort_buffer();
       DBUG_ASSERT(thd->is_error());
       goto err;
     }
     // For PQ queries (with limit) we initialize all pointers.
-    table_sort.init_record_pointers();
+    sort->init_record_pointers();
   }
   else
   {
     DBUG_PRINT("info", ("filesort PQ is not applicable"));
 
-    size_t min_sort_memory= MY_MAX(MIN_SORT_MEMORY, param.sort_length*MERGEBUFF2);
+    size_t min_sort_memory= MY_MAX(MIN_SORT_MEMORY,
+                                   param.sort_length*MERGEBUFF2);
     set_if_bigger(min_sort_memory, sizeof(BUFFPEK*)*MERGEBUFF2);
     while (memory_available >= min_sort_memory)
     {
       ulonglong keys= memory_available / (param.rec_length + sizeof(char*));
       param.max_keys_per_buffer= (uint) MY_MIN(num_rows, keys);
-      if (table_sort.get_sort_keys())
-      {
-        // If we have already allocated a buffer, it better have same size!
-        if (!table_sort.check_sort_buffer_properties(param.max_keys_per_buffer,
-                                                     param.rec_length))
-        {
-          /*
-            table->sort will still have a pointer to the same buffer,
-            but that will be overwritten by the assignment below.
-          */
-          table_sort.free_sort_buffer();
-        }
-      }
-      table_sort.alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length);
-      if (table_sort.get_sort_keys())
+      if (sort->alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length))
         break;
       size_t old_memory_available= memory_available;
       memory_available= memory_available/4*3;
@@ -284,7 +267,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
       my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
       goto err;
     }
-    tracker->report_sort_buffer_size(table_sort.sort_buffer_size());
+    tracker->report_sort_buffer_size(sort->sort_buffer_size());
   }
 
   if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
@@ -294,21 +277,21 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   param.sort_form= table;
   param.end=(param.local_sortorder=sortorder)+s_length;
   num_rows= find_all_keys(thd, &param, select,
-                          &table_sort,
+                          sort,
                           &buffpek_pointers,
                           &tempfile, 
                           pq.is_initialized() ? &pq : NULL,
-                          found_rows);
+                          &sort->found_rows);
   if (num_rows == HA_POS_ERROR)
     goto err;
 
   maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek));
   tracker->report_merge_passes_at_start(thd->query_plan_fsort_passes);
-  tracker->report_row_numbers(param.examined_rows, *found_rows, num_rows);
+  tracker->report_row_numbers(param.examined_rows, sort->found_rows, num_rows);
 
   if (maxbuffer == 0)			// The whole set is in memory
   {
-    if (save_index(&param, (uint) num_rows, &table_sort))
+    if (save_index(&param, (uint) num_rows, sort))
       goto err;
   }
   else
@@ -316,17 +299,17 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     /* filesort cannot handle zero-length records during merge. */
     DBUG_ASSERT(param.sort_length != 0);
 
-    if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
+    if (sort->buffpek.str && sort->buffpek.length < maxbuffer)
     {
-      my_free(table_sort.buffpek);
-      table_sort.buffpek= 0;
+      my_free(sort->buffpek.str);
+      sort->buffpek.str= 0;
     }
-    if (!(table_sort.buffpek=
-          (uchar *) read_buffpek_from_file(&buffpek_pointers, maxbuffer,
-                                 table_sort.buffpek)))
+    if (!(sort->buffpek.str=
+          (char *) read_buffpek_from_file(&buffpek_pointers, maxbuffer,
+                                          (uchar*) sort->buffpek.str)))
       goto err;
-    buffpek= (BUFFPEK *) table_sort.buffpek;
-    table_sort.buffpek_len= maxbuffer;
+    sort->buffpek.length= maxbuffer;
+    buffpek= (BUFFPEK *) sort->buffpek.str;
     close_cached_file(&buffpek_pointers);
 	/* Open cached file if it isn't open */
     if (! my_b_inited(outfile) &&
@@ -345,7 +328,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
                                param.rec_length - 1);
     maxbuffer--;				// Offset from 0
     if (merge_many_buff(&param,
-                        (uchar*) table_sort.get_sort_keys(),
+                        (uchar*) sort->get_sort_keys(),
                         buffpek,&maxbuffer,
 			&tempfile))
       goto err;
@@ -353,7 +336,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 	reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
       goto err;
     if (merge_index(&param,
-                    (uchar*) table_sort.get_sort_keys(),
+                    (uchar*) sort->get_sort_keys(),
                     buffpek,
                     maxbuffer,
                     &tempfile,
@@ -372,11 +355,18 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   my_free(param.tmp_buffer);
   if (!subselect || !subselect->is_uncacheable())
   {
-    table_sort.free_sort_buffer();
-    my_free(buffpek);
-    table_sort.buffpek= 0;
-    table_sort.buffpek_len= 0;
+    sort->free_sort_buffer();
+    my_free(sort->buffpek.str);
   }
+  else
+  {
+    /* Remember sort buffers for next subquery call */
+    subselect->filesort_buffer= sort->filesort_buffer;
+    subselect->sortbuffer=      sort->buffpek;
+    sort->filesort_buffer.reset();              // Don't free this
+  }
+  sort->buffpek.str= 0;
+
   close_cached_file(&tempfile);
   close_cached_file(&buffpek_pointers);
   if (my_b_inited(outfile))
@@ -397,13 +387,6 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     int kill_errno= thd->killed_errno();
     DBUG_ASSERT(thd->is_error() || kill_errno || thd->killed == ABORT_QUERY);
 
-    /*
-      We replace the table->sort at the end.
-      Hence calling free_io_cache to make sure table->sort.io_cache
-      used for QUICK_INDEX_MERGE_SELECT is free.
-    */
-    free_io_cache(table);
-
     my_printf_error(ER_FILSORT_ABORT,
                     "%s: %s",
                     MYF(0),
@@ -424,50 +407,26 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   }
   else
     thd->inc_status_sort_rows(num_rows);
-  *examined_rows= param.examined_rows;
+
+  sort->examined_rows= param.examined_rows;
+  sort->return_rows= num_rows;
 #ifdef SKIP_DBUG_IN_FILESORT
   DBUG_POP();			/* Ok to DBUG */
 #endif
 
-  /* table->sort.io_cache should be free by this time */
-  DBUG_ASSERT(NULL == table->sort.io_cache);
-
-  // Assign the copy back!
-  table->sort= table_sort;
-
   DBUG_PRINT("exit",
-             ("num_rows: %ld examined_rows: %ld found_rows: %ld",
-              (long) num_rows, (long) *examined_rows, (long) *found_rows));
+             ("num_rows: %lld examined_rows: %lld found_rows: %lld",
+              (longlong) sort->return_rows, (longlong) sort->examined_rows,
+              (longlong) sort->found_rows));
   MYSQL_FILESORT_DONE(error, num_rows);
-  DBUG_RETURN(error ? HA_POS_ERROR : num_rows);
-} /* filesort */
-
-
-void filesort_free_buffers(TABLE *table, bool full)
-{
-  DBUG_ENTER("filesort_free_buffers");
-
-  my_free(table->sort.record_pointers);
-  table->sort.record_pointers= NULL;
-
-  if (unlikely(full))
-  {
-    table->sort.free_sort_buffer();
-    my_free(table->sort.buffpek);
-    table->sort.buffpek= NULL;
-    table->sort.buffpek_len= 0;
-  }
 
-  /* addon_buf is only allocated if addon_field is set */
-  if (unlikely(table->sort.addon_field))
+  if (error)
   {
-    my_free(table->sort.addon_field);
-    my_free(table->sort.addon_buf);
-    table->sort.addon_buf= NULL;
-    table->sort.addon_field= NULL;
+    delete sort;
+    sort= 0;
   }
-  DBUG_VOID_RETURN;
-}
+  DBUG_RETURN(sort);
+} /* filesort */
 
 
 /** Read 'count' number of buffer pointers into memory. */
@@ -672,7 +631,7 @@ static void dbug_print_record(TABLE *table, bool print_rowid)
 */
 
 static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
-                             Filesort_info *fs_info,
+                             SORT_INFO *fs_info,
 			     IO_CACHE *buffpek_pointers,
                              IO_CACHE *tempfile,
                              Bounded_queue<uchar, uchar> *pq,
@@ -877,7 +836,7 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
   const ha_rows retval=
     my_b_inited(tempfile) ?
     (ha_rows) (my_b_tell(tempfile)/param->rec_length) : idx;
-  DBUG_PRINT("info", ("find_all_keys return %u", (uint) retval));
+  DBUG_PRINT("info", ("find_all_keys return %llu", (ulonglong) retval));
   DBUG_RETURN(retval);
 } /* find_all_keys */
 
@@ -905,7 +864,7 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
 */
 
 static bool
-write_keys(Sort_param *param,  Filesort_info *fs_info, uint count,
+write_keys(Sort_param *param,  SORT_INFO *fs_info, uint count,
            IO_CACHE *buffpek_pointers, IO_CACHE *tempfile)
 {
   size_t rec_length;
@@ -1274,11 +1233,13 @@ static void register_used_fields(Sort_param *param)
 }
 
 
-static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
+static bool save_index(Sort_param *param, uint count,
+                       SORT_INFO *table_sort)
 {
   uint offset,res_length;
   uchar *to;
   DBUG_ENTER("save_index");
+  DBUG_ASSERT(table_sort->record_pointers == 0);
 
   table_sort->sort_buffer(param, count);
   res_length= param->res_length;
@@ -1327,7 +1288,7 @@ static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
 */
 
 bool check_if_pq_applicable(Sort_param *param,
-                            Filesort_info *filesort_info,
+                            SORT_INFO *filesort_info,
                             TABLE *table, ha_rows num_rows,
                             ulong memory_available)
 {
@@ -1361,9 +1322,8 @@ bool check_if_pq_applicable(Sort_param *param,
     // The whole source set fits into memory.
     if (param->max_rows < num_rows/PQ_slowness )
     {
-      filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
-                                       param->rec_length);
-      DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
+      DBUG_RETURN(filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
+                                                   param->rec_length) != NULL);
     }
     else
     {
@@ -1375,9 +1335,8 @@ bool check_if_pq_applicable(Sort_param *param,
   // Do we have space for LIMIT rows in memory?
   if (param->max_keys_per_buffer < num_available_keys)
   {
-    filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
-                                     param->rec_length);
-    DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
+    DBUG_RETURN(filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
+                                                 param->rec_length) != NULL);
   }
 
   // Try to strip off addon fields.
@@ -1413,17 +1372,14 @@ bool check_if_pq_applicable(Sort_param *param,
       if (sort_merge_cost < pq_cost)
         DBUG_RETURN(false);
 
-      filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
-                                       param->sort_length + param->ref_length);
-      if (filesort_info->get_sort_keys())
+      if (filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
+                                           param->sort_length +
+                                           param->ref_length))
       {
-        // Make attached data to be references instead of fields.
-        my_free(filesort_info->addon_buf);
+        /* Make attached data to be references instead of fields. */
         my_free(filesort_info->addon_field);
-        filesort_info->addon_buf= NULL;
         filesort_info->addon_field= NULL;
         param->addon_field= NULL;
-        param->addon_length= 0;
 
         param->res_length= param->ref_length;
         param->sort_length+= param->ref_length;
@@ -1993,7 +1949,7 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
   @param thd                 Current thread
   @param ptabfield           Array of references to the table fields
   @param sortlength          Total length of sorted fields
-  @param[out] plength        Total length of appended fields
+  @param [out] addon_buf     Buffer to us for appended fields
 
   @note
     The null bits for the appended values are supposed to be put together
@@ -2007,7 +1963,7 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
 
 static SORT_ADDON_FIELD *
 get_addon_fields(ulong max_length_for_sort_data,
-                 Field **ptabfield, uint sortlength, uint *plength)
+                 Field **ptabfield, uint sortlength, LEX_STRING *addon_buf)
 {
   Field **pfield;
   Field *field;
@@ -2016,6 +1972,7 @@ get_addon_fields(ulong max_length_for_sort_data,
   uint fields= 0;
   uint null_fields= 0;
   MY_BITMAP *read_set= (*ptabfield)->table->read_set;
+  DBUG_ENTER("get_addon_fields");
 
   /*
     If there is a reference to a field in the query add it
@@ -2027,31 +1984,33 @@ get_addon_fields(ulong max_length_for_sort_data,
     the values directly from sorted fields.
     But beware the case when item->cmp_type() != item->result_type()
   */
-  *plength= 0;
+  addon_buf->str= 0;
+  addon_buf->length= 0;
 
   for (pfield= ptabfield; (field= *pfield) ; pfield++)
   {
     if (!bitmap_is_set(read_set, field->field_index))
       continue;
     if (field->flags & BLOB_FLAG)
-      return 0;
+      DBUG_RETURN(0);
     length+= field->max_packed_col_length(field->pack_length());
     if (field->maybe_null())
       null_fields++;
     fields++;
   } 
   if (!fields)
-    return 0;
+    DBUG_RETURN(0);
   length+= (null_fields+7)/8;
 
   if (length+sortlength > max_length_for_sort_data ||
-      !(addonf= (SORT_ADDON_FIELD *) my_malloc(sizeof(SORT_ADDON_FIELD)*
-                                               (fields+1),
-                                               MYF(MY_WME |
-                                                   MY_THREAD_SPECIFIC))))
-    return 0;
+      !my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC),
+                       &addonf, sizeof(SORT_ADDON_FIELD) * (fields+1),
+                       &addon_buf->str, length,
+                       NullS))
 
-  *plength= length;
+    DBUG_RETURN(0);
+
+  addon_buf->length= length;
   length= (null_fields+7)/8;
   null_fields= 0;
   for (pfield= ptabfield; (field= *pfield) ; pfield++)
@@ -2078,7 +2037,7 @@ get_addon_fields(ulong max_length_for_sort_data,
   addonf->field= 0;     // Put end marker
   
   DBUG_PRINT("info",("addon_length: %d",length));
-  return (addonf-fields);
+  DBUG_RETURN(addonf-fields);
 }
 
 
@@ -2164,3 +2123,13 @@ void change_double_for_sort(double nr,uchar *to)
   }
 }
 
+/**
+   Free SORT_INFO
+*/
+
+SORT_INFO::~SORT_INFO()
+{
+  DBUG_ENTER("~SORT_INFO::SORT_INFO()");
+  free_data();
+  DBUG_VOID_RETURN;
+}
diff --git a/sql/filesort.h b/sql/filesort.h
index 22d7f987de2..454c745b5c0 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -16,10 +16,8 @@
 #ifndef FILESORT_INCLUDED
 #define FILESORT_INCLUDED
 
-class SQL_SELECT;
-
-#include "my_global.h"                          /* uint, uchar */
 #include "my_base.h"                            /* ha_rows */
+#include "filesort_utils.h"
 
 class SQL_SELECT;
 class THD;
@@ -27,12 +25,92 @@ struct TABLE;
 struct SORT_FIELD;
 class Filesort_tracker;
 
-ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
-                 uint s_length, SQL_SELECT *select,
-                 ha_rows max_rows, bool sort_positions,
-                 ha_rows *examined_rows, ha_rows *found_rows,
-                 Filesort_tracker* tracker);
-void filesort_free_buffers(TABLE *table, bool full);
+class SORT_INFO
+{
+  /// Buffer for sorting keys.
+  Filesort_buffer filesort_buffer;
+
+public:
+  SORT_INFO()
+    :addon_field(0), record_pointers(0)
+  {
+    buffpek.str= 0;
+    my_b_clear(&io_cache);
+  }
+
+  ~SORT_INFO();
+
+  void free_data()
+  {
+    close_cached_file(&io_cache);
+    my_free(record_pointers);
+    my_free(buffpek.str);
+    my_free(addon_field);
+  }
+
+  void reset()
+  {
+    free_data();
+    record_pointers= 0;
+    buffpek.str= 0;
+    addon_field= 0;
+  }
+
+
+  IO_CACHE  io_cache;           /* If sorted through filesort */
+  LEX_STRING buffpek;           /* Buffer for buffpek structures */
+  LEX_STRING addon_buf;         /* Pointer to a buffer if sorted with fields */
+  struct st_sort_addon_field *addon_field;     /* Pointer to the fields info */
+  /* To unpack back */
+  void    (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
+  uchar     *record_pointers;    /* If sorted in memory */
+  /*
+    How many rows in final result.
+    Also how many rows in record_pointers, if used
+  */
+  ha_rows   return_rows;
+  ha_rows   examined_rows;	/* How many rows read */
+  ha_rows   found_rows;         /* How many rows was accepted */
+
+  /** Sort filesort_buffer */
+  void sort_buffer(Sort_param *param, uint count)
+  { filesort_buffer.sort_buffer(param, count); }
+
+  /**
+     Accessors for Filesort_buffer (which @c).
+  */
+  uchar *get_record_buffer(uint idx)
+  { return filesort_buffer.get_record_buffer(idx); }
+
+  uchar **get_sort_keys()
+  { return filesort_buffer.get_sort_keys(); }
+
+  uchar **alloc_sort_buffer(uint num_records, uint record_length)
+  { return filesort_buffer.alloc_sort_buffer(num_records, record_length); }
+
+  void free_sort_buffer()
+  { filesort_buffer.free_sort_buffer(); }
+
+  void init_record_pointers()
+  { filesort_buffer.init_record_pointers(); }
+
+  size_t sort_buffer_size() const
+  { return filesort_buffer.sort_buffer_size(); }
+
+  friend SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
+                                 uint s_length,
+                                 SQL_SELECT *select, ha_rows max_rows,
+                                 bool sort_positions,
+                                 Filesort_tracker* tracker);
+};
+
+
+SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
+                        uint s_length,
+                        SQL_SELECT *select, ha_rows max_rows,
+                        bool sort_positions,
+                        Filesort_tracker* tracker);
+
 void change_double_for_sort(double nr,uchar *to);
 
 #endif /* FILESORT_INCLUDED */
diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc
index 1cef30b6a56..34110dcfc1f 100644
--- a/sql/filesort_utils.cc
+++ b/sql/filesort_utils.cc
@@ -85,31 +85,66 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
   return total_cost;
 }
 
-uchar **Filesort_buffer::alloc_sort_buffer(uint num_records, uint record_length)
-{
-  ulong sort_buff_sz;
+/*
+  alloc_sort_buffer()
 
-  DBUG_ENTER("alloc_sort_buffer");
+  Allocate buffer for sorting keys.
+  Try to reuse old buffer if possible.
 
+  @return
+    0   Error
+    #   Pointer to allocated buffer
+*/
+
+uchar **Filesort_buffer::alloc_sort_buffer(uint num_records,
+                                           uint record_length)
+{
+  size_t buff_size;
+  uchar **sort_keys, **start_of_data;
+  DBUG_ENTER("alloc_sort_buffer");
   DBUG_EXECUTE_IF("alloc_sort_buffer_fail",
                   DBUG_SET("+d,simulate_out_of_memory"););
 
-  if (m_idx_array.is_null())
+  buff_size= num_records * (record_length + sizeof(uchar*));
+  set_if_bigger(buff_size, record_length * MERGEBUFF2); 
+
+  if (!m_idx_array.is_null())
   {
-    sort_buff_sz= num_records * (record_length + sizeof(uchar*));
-    set_if_bigger(sort_buff_sz, record_length * MERGEBUFF2); 
-    uchar **sort_keys=
-      (uchar**) my_malloc(sort_buff_sz, MYF(MY_THREAD_SPECIFIC));
-    m_idx_array= Idx_array(sort_keys, num_records);
-    m_record_length= record_length;
-    uchar **start_of_data= m_idx_array.array() + m_idx_array.size();
-    m_start_of_data= reinterpret_cast<uchar*>(start_of_data);
+    /*
+      Reuse old buffer if exists and is large enough
+      Note that we don't make the buffer smaller, as we want to be
+      prepared for next subquery iteration.
+    */
+
+    sort_keys= m_idx_array.array();
+    if (buff_size > allocated_size)
+    {
+      /*
+        Better to free and alloc than realloc as we don't have to remember
+        the old values
+      */
+      my_free(sort_keys);
+      if (!(sort_keys= (uchar**) my_malloc(buff_size,
+                                           MYF(MY_THREAD_SPECIFIC))))
+      {
+        reset();
+        DBUG_RETURN(0);
+      }
+      allocated_size= buff_size;
+    }
   }
   else
   {
-    DBUG_ASSERT(num_records == m_idx_array.size());
-    DBUG_ASSERT(record_length == m_record_length);
+    if (!(sort_keys= (uchar**) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))))
+      DBUG_RETURN(0);
+    allocated_size= buff_size;
   }
+
+  m_idx_array= Idx_array(sort_keys, num_records);
+  m_record_length= record_length;
+  start_of_data= m_idx_array.array() + m_idx_array.size();
+  m_start_of_data= reinterpret_cast<uchar*>(start_of_data);
+
   DBUG_RETURN(m_idx_array.array());
 }
 
@@ -117,8 +152,7 @@ uchar **Filesort_buffer::alloc_sort_buffer(uint num_records, uint record_length)
 void Filesort_buffer::free_sort_buffer()
 {
   my_free(m_idx_array.array());
-  m_idx_array= Idx_array();
-  m_record_length= 0;
+  m_idx_array.reset();
   m_start_of_data= NULL;
 }
 
diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h
index 00fa6f2566b..d537b602edf 100644
--- a/sql/filesort_utils.h
+++ b/sql/filesort_utils.h
@@ -60,9 +60,23 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
 class Filesort_buffer
 {
 public:
-  Filesort_buffer() :
-    m_idx_array(), m_record_length(0), m_start_of_data(NULL)
+  Filesort_buffer()
+    : m_idx_array(), m_start_of_data(NULL), allocated_size(0)
   {}
+  
+  ~Filesort_buffer()
+  {
+    my_free(m_idx_array.array());
+  }
+
+  bool is_allocated()
+  {
+    return m_idx_array.array() != 0;
+  }
+  void reset()
+  {
+    m_idx_array.reset();
+  }
 
   /** Sort me... */
   void sort_buffer(const Sort_param *param, uint count);
@@ -84,20 +98,12 @@ public:
   /// Returns total size: pointer array + record buffers.
   size_t sort_buffer_size() const
   {
-    return m_idx_array.size() * (m_record_length + sizeof(uchar*));
+    return allocated_size;
   }
 
   /// Allocates the buffer, but does *not* initialize pointers.
   uchar **alloc_sort_buffer(uint num_records, uint record_length);
 
-
-  /// Check  <num_records, record_length> for the buffer
-  bool check_sort_buffer_properties(uint num_records,  uint record_length)
-  {
-    return (static_cast<uint>(m_idx_array.size()) == num_records &&
-            m_record_length == record_length);
-  }
-
   /// Frees the buffer.
   void free_sort_buffer();
 
@@ -115,15 +121,17 @@ public:
     m_idx_array= rhs.m_idx_array;
     m_record_length= rhs.m_record_length;
     m_start_of_data= rhs.m_start_of_data;
+    allocated_size=  rhs.allocated_size;
     return *this;
   }
 
 private:
   typedef Bounds_checked_array<uchar*> Idx_array;
 
-  Idx_array  m_idx_array;
+  Idx_array  m_idx_array;                       /* Pointers to key data */
   uint       m_record_length;
-  uchar     *m_start_of_data;
+  uchar     *m_start_of_data;                   /* Start of key data */
+  size_t    allocated_size;
 };
 
 #endif  // FILESORT_UTILS_INCLUDED
diff --git a/sql/handler.cc b/sql/handler.cc
index 6919b252e14..863a6f15241 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -30,7 +30,7 @@
 #include "sql_table.h"                   // build_table_filename
 #include "sql_parse.h"                          // check_stack_overrun
 #include "sql_acl.h"            // SUPER_ACL
-#include "sql_base.h"           // free_io_cache
+#include "sql_base.h"           // TDC_element
 #include "discover.h"           // extension_based_table_discovery, etc
 #include "log_event.h"          // *_rows_log_event
 #include "create_options.h"
@@ -5863,8 +5863,6 @@ int handler::ha_reset()
   DBUG_ASSERT(table->key_read == 0);
   /* ensure that ha_index_end / ha_rnd_end has been called */
   DBUG_ASSERT(inited == NONE);
-  /* Free cache used by filesort */
-  free_io_cache(table);
   /* reset the bitmaps to point to defaults */
   table->default_column_bitmaps();
   pushed_cond= NULL;
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index f22716d3d81..87c9fa696f7 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -58,6 +58,8 @@ Item_subselect::Item_subselect(THD *thd_arg):
 {
   DBUG_ENTER("Item_subselect::Item_subselect");
   DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
+  sortbuffer.str= 0;
+
 #ifndef DBUG_OFF
   exec_counter= 0;
 #endif
@@ -153,6 +155,9 @@ void Item_subselect::cleanup()
   if (engine)
     engine->cleanup();
   reset();
+  filesort_buffer.free_sort_buffer();
+  my_free(sortbuffer.str);
+
   value_assigned= 0;
   expr_cache= 0;
   forced_const= FALSE;
diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index 1b450044954..58b5a948048 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -95,6 +95,9 @@ public:
   subselect_engine *engine;
   /* unit of subquery */
   st_select_lex_unit *unit;
+  /* Cached buffers used when calling filesort in sub queries */
+  Filesort_buffer filesort_buffer;
+  LEX_STRING sortbuffer;
   /* A reference from inside subquery predicate to somewhere outside of it */
   class Ref_to_outside : public Sql_alloc
   {
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 5b98ea801f0..139eb717748 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -29,6 +29,7 @@
 #include <my_global.h>
 #include "sql_priv.h"
 #include "sql_select.h"
+#include "uniques.h"
 
 /**
   Calculate the affordable RAM limit for structures like TREE or Unique
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 704f730d18a..4463c1d891c 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -363,6 +363,7 @@ static bool volatile select_thread_in_use, signal_thread_in_use;
 static volatile bool ready_to_exit;
 static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0;
 static my_bool opt_short_log_format= 0, opt_silent_startup= 0;
+
 uint kill_cached_threads;
 static uint wake_thread;
 ulong max_used_connections;
@@ -389,6 +390,7 @@ static DYNAMIC_ARRAY all_options;
 
 bool opt_bin_log, opt_bin_log_used=0, opt_ignore_builtin_innodb= 0;
 my_bool opt_log, debug_assert_if_crashed_table= 0, opt_help= 0;
+my_bool debug_assert_on_not_freed_memory= 0;
 my_bool disable_log_notes;
 static my_bool opt_abort;
 ulonglong log_output_options;
@@ -4085,7 +4087,8 @@ static void my_malloc_size_cb_func(long long size, my_bool is_thread_specific)
                             (longlong) thd->status_var.local_memory_used,
                             size));
         thd->status_var.local_memory_used+= size;
-        DBUG_ASSERT((longlong) thd->status_var.local_memory_used >= 0);
+        DBUG_ASSERT((longlong) thd->status_var.local_memory_used >= 0 ||
+                    !debug_assert_on_not_freed_memory);
       }
     }
   }
@@ -6050,7 +6053,7 @@ int mysqld_main(int argc, char **argv)
       CloseHandle(hEventShutdown);
   }
 #endif
-#if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
+#if (defined(HAVE_OPENSSL) && !defined(HAVE_YASSL)) && !defined(EMBEDDED_LIBRARY)
   ERR_remove_state(0);
 #endif
   mysqld_exit(0);
@@ -7298,6 +7301,13 @@ struct my_option my_long_options[]=
    &opt_sporadic_binlog_dump_fail, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0,
    0},
 #endif /* HAVE_REPLICATION */
+#ifndef DBUG_OFF
+  {"debug-assert-on-not-freed-memory", 0,
+   "Assert if we found problems with memory allocation",
+   &debug_assert_on_not_freed_memory,
+   &debug_assert_on_not_freed_memory, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0,
+   0},
+#endif /* DBUG_OFF */
   /* default-storage-engine should have "MyISAM" as def_value. Instead
      of initializing it here it is done in init_common_variables() due
      to a compiler bug in Sun Studio compiler. */
diff --git a/sql/mysqld.h b/sql/mysqld.h
index ed9b711a4d1..f69654660fd 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -119,6 +119,7 @@ extern bool opt_disable_networking, opt_skip_show_db;
 extern bool opt_skip_name_resolve;
 extern bool opt_ignore_builtin_innodb;
 extern my_bool opt_character_set_client_handshake;
+extern my_bool debug_assert_on_not_freed_memory;
 extern bool volatile abort_loop;
 extern bool volatile in_bootstrap;
 extern uint connection_count;
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index ed7e9a56ae5..7169a3eda81 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -114,12 +114,11 @@
 #include "sql_parse.h"                          // check_stack_overrun
 #include "sql_partition.h"    // get_part_id_func, PARTITION_ITERATOR,
                               // struct partition_info, NOT_A_PARTITION_ID
-#include "sql_base.h"         // free_io_cache
 #include "records.h"          // init_read_record, end_read_record
 #include <m_ctype.h>
 #include "sql_select.h"
 #include "sql_statistics.h"
-#include "filesort.h"         // filesort_free_buffers
+#include "uniques.h"
 
 #ifndef EXTRA_DEBUG
 #define test_rb_tree(A,B) {}
@@ -1154,6 +1153,7 @@ int imerge_list_and_tree(RANGE_OPT_PARAM *param,
 
 SQL_SELECT *make_select(TABLE *head, table_map const_tables,
 			table_map read_tables, COND *conds,
+                        SORT_INFO *filesort,
                         bool allow_null_cond,
                         int *error)
 {
@@ -1174,13 +1174,16 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables,
   select->head=head;
   select->cond= conds;
 
-  if (head->sort.io_cache)
+  if (filesort && my_b_inited(&filesort->io_cache))
   {
-    select->file= *head->sort.io_cache;
+    /*
+      Hijack the filesort io_cache for make_select
+      SQL_SELECT will be responsible for ensuring that it's properly freed.
+    */
+    select->file= filesort->io_cache;
     select->records=(ha_rows) (select->file.end_of_file/
 			       head->file->ref_length);
-    my_free(head->sort.io_cache);
-    head->sort.io_cache=0;
+    my_b_clear(&filesort->io_cache);
   }
   DBUG_RETURN(select);
 }
@@ -1393,7 +1396,6 @@ QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT()
   delete pk_quick_select;
   /* It's ok to call the next two even if they are already deinitialized */
   end_read_record(&read_record);
-  free_io_cache(head);
   free_root(&alloc,MYF(0));
   DBUG_VOID_RETURN;
 }
@@ -10674,7 +10676,6 @@ int read_keys_and_merge_scans(THD *thd,
   else
   {
     unique->reset();
-    filesort_free_buffers(head, false);
   }
 
   DBUG_ASSERT(file->ref_length == unique->get_size());
@@ -10727,7 +10728,7 @@ int read_keys_and_merge_scans(THD *thd,
 
   /*
     Ok all rowids are in the Unique now. The next call will initialize
-    head->sort structure so it can be used to iterate through the rowids
+    the unique structure so it can be used to iterate through the rowids
     sequence.
   */
   result= unique->get(head);
@@ -10736,7 +10737,8 @@ int read_keys_and_merge_scans(THD *thd,
   */
   if (enabled_keyread)
     head->disable_keyread();
-  if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, 1 , 1, TRUE))
+  if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0,
+                       &unique->sort, 1 , 1, TRUE))
     result= 1;
  DBUG_RETURN(result);
 
@@ -10779,7 +10781,8 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
   {
     result= HA_ERR_END_OF_FILE;
     end_read_record(&read_record);
-    free_io_cache(head);
+    // Free things used by sort early. Shouldn't be strictly necessary
+    unique->sort.reset();
     /* All rows from Unique have been retrieved, do a clustered PK scan */
     if (pk_quick_select)
     {
@@ -10814,7 +10817,7 @@ int QUICK_INDEX_INTERSECT_SELECT::get_next()
   {
     result= HA_ERR_END_OF_FILE;
     end_read_record(&read_record);
-    free_io_cache(head);
+    unique->sort.reset();                       // Free things early
   }
 
   DBUG_RETURN(result);
@@ -14618,6 +14621,4 @@ void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
   }
 }
 
-
 #endif /* !DBUG_OFF */
-
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 80f4064a529..6970b87f6d8 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -26,6 +26,8 @@
 
 #include "records.h"                            /* READ_RECORD */
 #include "queues.h"                             /* QUEUE */
+#include "filesort.h"                           /* SORT_INFO */
+
 /*
   It is necessary to include set_var.h instead of item.h because there
   are dependencies on include order for set_var.h and item.h. This
@@ -1658,6 +1660,7 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
                                              ha_rows records);
 SQL_SELECT *make_select(TABLE *head, table_map const_tables,
 			table_map read_tables, COND *conds,
+                        SORT_INFO* filesort,
                         bool allow_null_cond,  int *error);
 
 bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond);
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index c1052869c8f..e53e2a9ee0d 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4739,8 +4739,6 @@ int clear_sj_tmp_tables(JOIN *join)
   {
     if ((res= table->file->ha_delete_all_rows()))
       return res; /* purecov: inspected */
-   free_io_cache(table);
-   filesort_free_buffers(table,0);
   }
 
   SJ_MATERIALIZATION_INFO *sjm;
diff --git a/sql/records.cc b/sql/records.cc
index ebda0ed35b0..3995bea6569 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -29,10 +29,10 @@
 #include "records.h"
 #include "sql_priv.h"
 #include "records.h"
-#include "filesort.h"            // filesort_free_buffers
 #include "opt_range.h"                          // SQL_SELECT
 #include "sql_class.h"                          // THD
 #include "sql_base.h"
+#include "sql_sort.h"                           // SORT_ADDON_FIELD
 
 static int rr_quick(READ_RECORD *info);
 int rr_sequential(READ_RECORD *info);
@@ -182,26 +182,30 @@ bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
 
 bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
 		      SQL_SELECT *select,
+                      SORT_INFO *filesort,
 		      int use_record_cache, bool print_error, 
                       bool disable_rr_cache)
 {
   IO_CACHE *tempfile;
+  SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0;
   DBUG_ENTER("init_read_record");
 
   bzero((char*) info,sizeof(*info));
   info->thd=thd;
   info->table=table;
   info->forms= &info->table;		/* Only one table */
+  info->addon_field= addon_field;
   
   if ((table->s->tmp_table == INTERNAL_TMP_TABLE ||
        table->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE) &&
-      !table->sort.addon_field)
+      !addon_field)
     (void) table->file->extra(HA_EXTRA_MMAP);
   
-  if (table->sort.addon_field)
+  if (addon_field)
   {
-    info->rec_buf= table->sort.addon_buf;
-    info->ref_length= table->sort.addon_length;
+    info->rec_buf=    (uchar*) filesort->addon_buf.str;
+    info->ref_length= filesort->addon_buf.length;
+    info->unpack=     filesort->unpack;
   }
   else
   {
@@ -213,19 +217,20 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
   info->print_error=print_error;
   info->unlock_row= rr_unlock_row;
   info->ignore_not_found_rows= 0;
-  table->status=0;			/* And it's always found */
+  table->status= 0;			/* Rows are always found */
 
+  tempfile= 0;
   if (select && my_b_inited(&select->file))
     tempfile= &select->file;
-  else
-    tempfile= table->sort.io_cache;
-  if (tempfile && my_b_inited(tempfile) &&
-      !(select && select->quick)) 
+  else if (filesort && my_b_inited(&filesort->io_cache))
+    tempfile= &filesort->io_cache;
+
+  if (tempfile && !(select && select->quick))
   {
     DBUG_PRINT("info",("using rr_from_tempfile"));
-    info->read_record= (table->sort.addon_field ?
+    info->read_record= (addon_field ?
                         rr_unpack_from_tempfile : rr_from_tempfile);
-    info->io_cache=tempfile;
+    info->io_cache= tempfile;
     reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
     info->ref_pos=table->file->ref;
     if (!table->file->inited)
@@ -233,12 +238,12 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
         DBUG_RETURN(1);
 
     /*
-      table->sort.addon_field is checked because if we use addon fields,
+      addon_field is checked because if we use addon fields,
       it doesn't make sense to use cache - we don't read from the table
-      and table->sort.io_cache is read sequentially
+      and filesort->io_cache is read sequentially
     */
     if (!disable_rr_cache &&
-        !table->sort.addon_field &&
+        !addon_field &&
 	thd->variables.read_rnd_buff_size &&
 	!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
 	(table->db_stat & HA_READ_ONLY ||
@@ -263,15 +268,15 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
     DBUG_PRINT("info",("using rr_quick"));
     info->read_record=rr_quick;
   }
-  else if (table->sort.record_pointers)
+  else if (filesort && filesort->record_pointers)
   {
     DBUG_PRINT("info",("using record_pointers"));
     if (table->file->ha_rnd_init_with_error(0))
       DBUG_RETURN(1);
-    info->cache_pos=table->sort.record_pointers;
-    info->cache_end=info->cache_pos+ 
-                    table->sort.found_records*info->ref_length;
-    info->read_record= (table->sort.addon_field ?
+    info->cache_pos= filesort->record_pointers;
+    info->cache_end= (info->cache_pos+ 
+                      filesort->return_rows * info->ref_length);
+    info->read_record= (addon_field ?
                         rr_unpack_from_buffer : rr_from_pointers);
   }
   else
@@ -288,7 +293,7 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
 	 (use_record_cache < 0 &&
 	  !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
       (void) table->file->extra_opt(HA_EXTRA_CACHE,
-				  thd->variables.read_buff_size);
+                                    thd->variables.read_buff_size);
   }
   /* Condition pushdown to storage engine */
   if ((table->file->ha_table_flags() & HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
@@ -311,7 +316,6 @@ void end_read_record(READ_RECORD *info)
   }
   if (info->table)
   {
-    filesort_free_buffers(info->table,0);
     if (info->table->created)
       (void) info->table->file->extra(HA_EXTRA_NO_CACHE);
     if (info->read_record != rr_quick) // otherwise quick_range does it
@@ -525,9 +529,8 @@ static int rr_unpack_from_tempfile(READ_RECORD *info)
 {
   if (my_b_read(info->io_cache, info->rec_buf, info->ref_length))
     return -1;
-  TABLE *table= info->table;
-  (*table->sort.unpack)(table->sort.addon_field, info->rec_buf,
-                        info->rec_buf + info->ref_length);
+  (*info->unpack)(info->addon_field, info->rec_buf,
+                  info->rec_buf + info->ref_length);
 
   return 0;
 }
@@ -577,11 +580,9 @@ static int rr_unpack_from_buffer(READ_RECORD *info)
 {
   if (info->cache_pos == info->cache_end)
     return -1;                      /* End of buffer */
-  TABLE *table= info->table;
-  (*table->sort.unpack)(table->sort.addon_field, info->cache_pos,
-                        info->cache_end);
+  (*info->unpack)(info->addon_field, info->cache_pos,
+                  info->cache_end);
   info->cache_pos+= info->ref_length;
-
   return 0;
 }
 	/* cacheing of records from a database */
diff --git a/sql/records.h b/sql/records.h
index a3f0b5eb084..1928acfd4f4 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -25,6 +25,7 @@ struct TABLE;
 class THD;
 class SQL_SELECT;
 class Copy_field;
+class SORT_INFO;
 
 /**
   A context for reading through a single table using a chosen access method:
@@ -60,8 +61,10 @@ struct READ_RECORD
   uchar *record;
   uchar *rec_buf;                /* to read field values  after filesort */
   uchar	*cache,*cache_pos,*cache_end,*read_positions;
+  struct st_sort_addon_field *addon_field;     /* Pointer to the fields info */
   struct st_io_cache *io_cache;
   bool print_error, ignore_not_found_rows;
+  void    (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
 
   /* 
     SJ-Materialization runtime may need to read fields from the materialized
@@ -74,7 +77,8 @@ public:
 };
 
 bool init_read_record(READ_RECORD *info, THD *thd, TABLE *reg_form,
-		      SQL_SELECT *select, int use_record_cache,
+		      SQL_SELECT *select, SORT_INFO *sort,
+                      int use_record_cache,
                       bool print_errors, bool disable_rr_cache);
 bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
                           bool print_error, uint idx, bool reverse);
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index 3ae25d5546b..3650256b5b7 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -1206,7 +1206,8 @@ static bool acl_load(THD *thd, TABLE_LIST *tables)
   (void) my_init_dynamic_array(&acl_hosts,sizeof(ACL_HOST), 20, 50, MYF(0));
   if ((table= tables[HOST_TABLE].table)) // "host" table may not exist (e.g. in MySQL 5.6.7+)
   {
-    if (init_read_record(&read_record_info, thd, table, NULL, 1, 1, FALSE))
+    if (init_read_record(&read_record_info, thd, table, NULL, NULL,
+                         1, 1, FALSE))
       goto end;
     table->use_all_columns();
     while (!(read_record_info.read_record(&read_record_info)))
@@ -1261,7 +1262,7 @@ static bool acl_load(THD *thd, TABLE_LIST *tables)
   freeze_size(&acl_hosts);
 
   if (init_read_record(&read_record_info, thd, table=tables[USER_TABLE].table,
-                       NULL, 1, 1, FALSE))
+                       NULL, NULL, 1, 1, FALSE))
     goto end;
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_users,sizeof(ACL_USER), 50, 100, MYF(0));
@@ -1523,7 +1524,7 @@ static bool acl_load(THD *thd, TABLE_LIST *tables)
   freeze_size(&acl_users);
 
   if (init_read_record(&read_record_info, thd, table=tables[DB_TABLE].table,
-                       NULL, 1, 1, FALSE))
+                       NULL, NULL, 1, 1, FALSE))
     goto end;
   table->use_all_columns();
   (void) my_init_dynamic_array(&acl_dbs,sizeof(ACL_DB), 50, 100, MYF(0));
@@ -1593,7 +1594,7 @@ static bool acl_load(THD *thd, TABLE_LIST *tables)
   if ((table= tables[PROXIES_PRIV_TABLE].table))
   {
     if (init_read_record(&read_record_info, thd, table,
-                         NULL, 1, 1, FALSE))
+                         NULL, NULL, 1, 1, FALSE))
       goto end;
     table->use_all_columns();
     while (!(read_record_info.read_record(&read_record_info)))
@@ -1622,7 +1623,8 @@ static bool acl_load(THD *thd, TABLE_LIST *tables)
 
   if ((table= tables[ROLES_MAPPING_TABLE].table))
   {
-    if (init_read_record(&read_record_info, thd, table, NULL, 1, 1, FALSE))
+    if (init_read_record(&read_record_info, thd, table, NULL, NULL, 1, 1,
+                         FALSE))
       goto end;
     table->use_all_columns();
     /* account for every role mapping */
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 728fd44226e..3e3c4452d5a 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -349,7 +349,6 @@ void intern_close_table(TABLE *table)
                         table->s ? table->s->table_name.str : "?",
                         (long) table));
 
-  free_io_cache(table);
   delete table->triggers;
   if (table->file)                              // Not true if placeholder
     (void) closefrm(table, 1);			// close file
@@ -359,21 +358,6 @@ void intern_close_table(TABLE *table)
 }
 
 
-/* Free resources allocated by filesort() and read_record() */
-
-void free_io_cache(TABLE *table)
-{
-  DBUG_ENTER("free_io_cache");
-  if (table->sort.io_cache)
-  {
-    close_cached_file(table->sort.io_cache);
-    my_free(table->sort.io_cache);
-    table->sort.io_cache=0;
-  }
-  DBUG_VOID_RETURN;
-}
-
-
 /**
    Auxiliary function which allows to kill delayed threads for
    particular table identified by its share.
@@ -1812,7 +1796,6 @@ void close_temporary(TABLE *table, bool free_share, bool delete_table)
   DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
                           table->s->db.str, table->s->table_name.str));
 
-  free_io_cache(table);
   closefrm(table, 0);
   if (delete_table)
     rm_temporary_table(table_type, table->s->path.str);
diff --git a/sql/sql_base.h b/sql/sql_base.h
index ef249b3ab05..b6e135b6feb 100644
--- a/sql/sql_base.h
+++ b/sql/sql_base.h
@@ -267,7 +267,6 @@ bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
                                     uint dt_phases);
 bool lock_tables(THD *thd, TABLE_LIST *tables, uint counter, uint flags);
 int decide_logging_format(THD *thd, TABLE_LIST *tables);
-void free_io_cache(TABLE *entry);
 void intern_close_table(TABLE *entry);
 void kill_delayed_threads_for_table(TDC_element *element);
 void close_thread_table(THD *thd, TABLE **table_ptr);
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 63a300269dd..e3b70566597 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1706,8 +1706,9 @@ THD::~THD()
   if (status_var.local_memory_used != 0)
   {
     DBUG_PRINT("error", ("memory_used: %lld", status_var.local_memory_used));
-    SAFEMALLOC_REPORT_MEMORY(my_thread_dbug_id());
-    DBUG_ASSERT(status_var.local_memory_used == 0);
+    SAFEMALLOC_REPORT_MEMORY(thread_id);
+    DBUG_ASSERT(status_var.local_memory_used == 0 ||
+                !debug_assert_on_not_freed_memory);
   }
 
   set_current_thd(orig_thd == this ? 0 : orig_thd);
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 933815a00ce..82a56510736 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -5026,85 +5026,7 @@ class user_var_entry
 user_var_entry *get_variable(HASH *hash, LEX_STRING &name,
 				    bool create_if_not_exists);
 
-/*
-   Unique -- class for unique (removing of duplicates).
-   Puts all values to the TREE. If the tree becomes too big,
-   it's dumped to the file. User can request sorted values, or
-   just iterate through them. In the last case tree merging is performed in
-   memory simultaneously with iteration, so it should be ~2-3x faster.
- */
-
-class Unique :public Sql_alloc
-{
-  DYNAMIC_ARRAY file_ptrs;
-  ulong max_elements;
-  ulonglong max_in_memory_size;
-  IO_CACHE file;
-  TREE tree;
-  uchar *record_pointers;
-  ulong filtered_out_elems;
-  bool flush();
-  uint size;
-  uint full_size;
-  uint min_dupl_count;   /* always 0 for unions, > 0 for intersections */
-  bool with_counters;
-
-  bool merge(TABLE *table, uchar *buff, bool without_last_merge);
-
-public:
-  ulong elements;
-  Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
-	 uint size_arg, ulonglong max_in_memory_size_arg,
-         uint min_dupl_count_arg= 0);
-  ~Unique();
-  ulong elements_in_tree() { return tree.elements_in_tree; }
-  inline bool unique_add(void *ptr)
-  {
-    DBUG_ENTER("unique_add");
-    DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
-    if (!(tree.flag & TREE_ONLY_DUPS) && 
-        tree.elements_in_tree >= max_elements && flush())
-      DBUG_RETURN(1);
-    DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
-  }
-
-  bool is_in_memory() { return (my_b_tell(&file) == 0); }
-  void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
-
-  bool get(TABLE *table);
-  
-  /* Cost of searching for an element in the tree */
-  inline static double get_search_cost(ulonglong tree_elems, uint compare_factor)
-  {
-    return log((double) tree_elems) / (compare_factor * M_LN2);
-  }  
-
-  static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
-                             ulonglong max_in_memory_size, uint compare_factor,
-                             bool intersect_fl, bool *in_memory);
-  inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
-                                            ulonglong max_in_memory_size)
-  {
-    register ulonglong max_elems_in_tree=
-      max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
-    return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
-  }
-
-  void reset();
-  bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
-
-  uint get_size() const { return size; }
-  ulonglong get_max_in_memory_size() const { return max_in_memory_size; }
-
-  friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
-  friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
-
-  friend int unique_write_to_file_with_count(uchar* key, element_count count,
-                                             Unique *unique);
-  friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, 
-				            Unique *unique);
-};
-
+class SORT_INFO;
 
 class multi_delete :public select_result_interceptor
 {
@@ -5132,7 +5054,7 @@ public:
   int send_data(List<Item> &items);
   bool initialize_tables (JOIN *join);
   int do_deletes();
-  int do_table_deletes(TABLE *table, bool ignore);
+  int do_table_deletes(TABLE *table, SORT_INFO *sort_info, bool ignore);
   bool send_eof();
   inline ha_rows num_deleted()
   {
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index f49a053918b..42e7f6c3569 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -40,6 +40,8 @@
 #include "sql_statistics.h"
 #include "transaction.h"
 #include "records.h"                            // init_read_record,
+#include "filesort.h"
+#include "uniques.h"
 #include "sql_derived.h"                        // mysql_handle_list_of_derived
                                                 // end_read_record
 #include "sql_partition.h"       // make_used_partitions_str
@@ -227,10 +229,12 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   int		error, loc_error;
   TABLE		*table;
   SQL_SELECT	*select=0;
+  SORT_INFO	*file_sort= 0;
   READ_RECORD	info;
   bool          using_limit=limit != HA_POS_ERROR;
   bool		transactional_table, safe_update, const_cond;
   bool          const_cond_result;
+  bool		return_error= 0;
   ha_rows	deleted= 0;
   bool          reverse= FALSE;
   ORDER *order= (ORDER *) ((order_list && order_list->elements) ?
@@ -405,7 +409,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
   table->covering_keys.clear_all();
   table->quick_keys.clear_all();		// Can't use 'only index'
 
-  select=make_select(table, 0, 0, conds, 0, &error);
+  select=make_select(table, 0, 0, conds, (SORT_INFO*) 0, 0, &error);
   if (error)
     DBUG_RETURN(TRUE);
   if ((select && select->check_quick(thd, safe_update, limit)) || !limit)
@@ -486,32 +490,21 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
 
   if (query_plan.using_filesort)
   {
-    ha_rows examined_rows;
-    ha_rows found_rows;
     uint         length= 0;
     SORT_FIELD  *sortorder;
 
     {
       DBUG_ASSERT(query_plan.index == MAX_KEY);
-      table->sort.io_cache= (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
-                                                   MYF(MY_FAE | MY_ZEROFILL |
-                                                       MY_THREAD_SPECIFIC));
       Filesort_tracker *fs_tracker= 
         thd->lex->explain->get_upd_del_plan()->filesort_tracker;
 
       if (!(sortorder= make_unireg_sortorder(thd, order, &length, NULL)) ||
-	  (table->sort.found_records= filesort(thd, table, sortorder, length,
-                                               select, HA_POS_ERROR,
-                                               true,
-                                               &examined_rows, &found_rows,
-                                               fs_tracker))
-	    == HA_POS_ERROR)
-      {
-        delete select;
-        free_underlaid_joins(thd, &thd->lex->select_lex);
-        DBUG_RETURN(TRUE);
-      }
-      thd->inc_examined_row_count(examined_rows);
+	  !(file_sort= filesort(thd, table, sortorder, length,
+                                select, HA_POS_ERROR,
+                                true,
+                                fs_tracker)))
+        goto got_error;
+      thd->inc_examined_row_count(file_sort->examined_rows);
       /*
         Filesort has already found and selected the rows we want to delete,
         so we don't need the where clause
@@ -524,24 +517,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
 
   /* If quick select is used, initialize it before retrieving rows. */
   if (select && select->quick && select->quick->reset())
-  {
-    delete select;
-    free_underlaid_joins(thd, select_lex);
-    DBUG_RETURN(TRUE);
-  }
+    goto got_error;
 
   if (query_plan.index == MAX_KEY || (select && select->quick))
-    error= init_read_record(&info, thd, table, select, 1, 1, FALSE);
+    error= init_read_record(&info, thd, table, select, file_sort, 1, 1, FALSE);
   else
     error= init_read_record_idx(&info, thd, table, 1, query_plan.index,
                                 reverse);
   if (error)
-  {
-    delete select;
-    free_underlaid_joins(thd, select_lex);
-    DBUG_RETURN(TRUE);
-  }
-
+    goto got_error;
+  
   init_ftfuncs(thd, select_lex, 1);
   THD_STAGE_INFO(thd, stage_updating);
 
@@ -697,8 +682,6 @@ cleanup:
   }
   DBUG_ASSERT(transactional_table || !deleted || thd->transaction.stmt.modified_non_trans_table);
   
-
-  free_underlaid_joins(thd, select_lex);
   if (error < 0 || 
       (thd->lex->ignore && !thd->is_error() && !thd->is_fatal_error))
   {
@@ -711,6 +694,8 @@ cleanup:
       my_ok(thd, deleted);
     DBUG_PRINT("info",("%ld records deleted",(long) deleted));
   }
+  delete file_sort;
+  free_underlaid_joins(thd, select_lex);
   DBUG_RETURN(error >= 0 || thd->is_error());
   
   /* Special exits */
@@ -729,9 +714,16 @@ send_nothing_and_leave:
   */
 
   delete select;
+  delete file_sort;
   free_underlaid_joins(thd, select_lex);
   //table->set_keyread(false);
-  DBUG_RETURN((thd->is_error() || thd->killed) ? 1 : 0);
+
+  DBUG_ASSERT(!return_error || thd->is_error() || thd->killed);
+  DBUG_RETURN((return_error || thd->is_error() || thd->killed) ? 1 : 0);
+
+got_error:
+  return_error= 1;
+  goto send_nothing_and_leave;
 }
 
 
@@ -1183,7 +1175,8 @@ int multi_delete::do_deletes()
     if (tempfiles[counter]->get(table))
       DBUG_RETURN(1);
 
-    local_error= do_table_deletes(table, thd->lex->ignore);
+    local_error= do_table_deletes(table, &tempfiles[counter]->sort,
+                                  thd->lex->ignore);
 
     if (thd->killed && !local_error)
       DBUG_RETURN(1);
@@ -1213,14 +1206,15 @@ int multi_delete::do_deletes()
    @retval  1 Triggers or handler reported error.
    @retval -1 End of file from handler.
 */
-int multi_delete::do_table_deletes(TABLE *table, bool ignore)
+int multi_delete::do_table_deletes(TABLE *table, SORT_INFO *sort_info,
+                                   bool ignore)
 {
   int local_error= 0;
   READ_RECORD info;
   ha_rows last_deleted= deleted;
   DBUG_ENTER("do_deletes_for_table");
 
-  if (init_read_record(&info, thd, table, NULL, 0, 1, FALSE))
+  if (init_read_record(&info, thd, table, NULL, sort_info, 0, 1, FALSE))
     DBUG_RETURN(1);
 
   /*
diff --git a/sql/sql_help.cc b/sql/sql_help.cc
index a0e836da203..a50b90fc111 100644
--- a/sql/sql_help.cc
+++ b/sql/sql_help.cc
@@ -194,7 +194,8 @@ int search_topics(THD *thd, TABLE *topics, struct st_find_field *find_fields,
   DBUG_ENTER("search_topics");
 
   /* Should never happen. As this is part of help, we can ignore this */
-  if (init_read_record(&read_record_info, thd, topics, select, 1, 0, FALSE))
+  if (init_read_record(&read_record_info, thd, topics, select, NULL, 1, 0,
+                       FALSE))
     DBUG_RETURN(0);
 
   while (!read_record_info.read_record(&read_record_info))
@@ -229,14 +230,16 @@ int search_topics(THD *thd, TABLE *topics, struct st_find_field *find_fields,
     2   found more then one topic matching the mask
 */
 
-int search_keyword(THD *thd, TABLE *keywords, struct st_find_field *find_fields,
+int search_keyword(THD *thd, TABLE *keywords,
+                   struct st_find_field *find_fields,
                    SQL_SELECT *select, int *key_id)
 {
   int count= 0;
   READ_RECORD read_record_info;
   DBUG_ENTER("search_keyword");
   /* Should never happen. As this is part of help, we can ignore this */
-  if (init_read_record(&read_record_info, thd, keywords, select, 1, 0, FALSE))
+  if (init_read_record(&read_record_info, thd, keywords, select, NULL, 1, 0,
+                       FALSE))
     DBUG_RETURN(0);
 
   while (!read_record_info.read_record(&read_record_info) && count<2)
@@ -370,7 +373,8 @@ int search_categories(THD *thd, TABLE *categories,
   DBUG_ENTER("search_categories");
 
   /* Should never happen. As this is part of help, we can ignore this */
-  if (init_read_record(&read_record_info, thd, categories, select,1,0,FALSE))
+  if (init_read_record(&read_record_info, thd, categories, select, NULL,
+                       1, 0, FALSE))
     DBUG_RETURN(0);
   while (!read_record_info.read_record(&read_record_info))
   {
@@ -406,7 +410,8 @@ void get_all_items_for_category(THD *thd, TABLE *items, Field *pfname,
   DBUG_ENTER("get_all_items_for_category");
 
   /* Should never happen. As this is part of help, we can ignore this */
-  if (init_read_record(&read_record_info, thd, items, select,1,0,FALSE))
+  if (init_read_record(&read_record_info, thd, items, select, NULL, 1, 0,
+                       FALSE))
     DBUG_VOID_RETURN;
 
   while (!read_record_info.read_record(&read_record_info))
@@ -608,7 +613,7 @@ SQL_SELECT *prepare_simple_select(THD *thd, Item *cond,
   /* Assume that no indexes cover all required fields */
   table->covering_keys.clear_all();
 
-  SQL_SELECT *res= make_select(table, 0, 0, cond, 0, error);
+  SQL_SELECT *res= make_select(table, 0, 0, cond, 0, 0, error);
   if (*error || (res && res->check_quick(thd, 0, HA_POS_ERROR)) ||
       (res && res->quick && res->quick->reset()))
   {
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 9a59764f425..dbe19674cf2 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -1796,7 +1796,8 @@ static void plugin_load(MEM_ROOT *tmp_root)
     goto end;
   }
 
-  if (init_read_record(&read_record_info, new_thd, table, NULL, 1, 0, FALSE))
+  if (init_read_record(&read_record_info, new_thd, table, NULL, NULL, 1, 0,
+                       FALSE))
   {
     sql_print_error("Could not initialize init_read_record; Plugins not "
                     "loaded");
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index cbac7c1bcc2..6ecabc24319 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1435,7 +1435,7 @@ JOIN::optimize_inner()
   }
 
   select= make_select(*table, const_table_map,
-                      const_table_map, conds, 1, &error);
+                      const_table_map, conds, (SORT_INFO*) 0, 1, &error);
   if (error)
   {						/* purecov: inspected */
     error= -1;					/* purecov: inspected */
@@ -2373,15 +2373,11 @@ JOIN::reinit()
   {
     exec_tmp_table1->file->extra(HA_EXTRA_RESET_STATE);
     exec_tmp_table1->file->ha_delete_all_rows();
-    free_io_cache(exec_tmp_table1);
-    filesort_free_buffers(exec_tmp_table1,0);
   }
   if (exec_tmp_table2)
   {
     exec_tmp_table2->file->extra(HA_EXTRA_RESET_STATE);
     exec_tmp_table2->file->ha_delete_all_rows();
-    free_io_cache(exec_tmp_table2);
-    filesort_free_buffers(exec_tmp_table2,0);
   }
   clear_sj_tmp_tables(this);
   if (items0)
@@ -3198,12 +3194,12 @@ void JOIN::exec_inner()
 	DBUG_VOID_RETURN;
       sortorder= curr_join->sortorder;
       if (curr_join->const_tables != curr_join->table_count &&
-          !curr_join->join_tab[curr_join->const_tables].table->sort.io_cache)
+          !curr_join->join_tab[curr_join->const_tables].filesort)
       {
         /*
-          If no IO cache exists for the first table then we are using an
-          INDEX SCAN and no filesort. Thus we should not remove the sorted
-          attribute on the INDEX SCAN.
+          If no filesort for the first table then we are using an
+          INDEX SCAN. Thus we should not remove the sorted attribute
+          on the INDEX SCAN.
         */
         skip_sort_order= 1;
       }
@@ -4100,6 +4096,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
         select= make_select(s->table, found_const_table_map,
 			    found_const_table_map,
 			    *s->on_expr_ref ? *s->on_expr_ref : join->conds,
+                            (SORT_INFO*) 0,
 			    1, &error);
         if (!select)
           goto error;
@@ -9048,13 +9045,21 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
   /*
     Reuse TABLE * and JOIN_TAB if already allocated by a previous call
     to this function through JOIN::exec (may happen for sub-queries).
+
+    psergey-todo: here, save the pointer for original join_tabs.
   */
-  if (!parent->join_tab_reexec &&
-      !(parent->join_tab_reexec= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
-    DBUG_RETURN(TRUE);                        /* purecov: inspected */
+  if (!(join_tab= parent->join_tab_reexec))
+  {
+    if (!(join_tab= parent->join_tab_reexec=
+          (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
+      DBUG_RETURN(TRUE);                        /* purecov: inspected */
+  }
+  else
+  {
+    /* Free memory used by previous allocations */
+    delete join_tab->filesort;
+  }
 
-  // psergey-todo: here, save the pointer for original join_tabs.
-  join_tab= parent->join_tab_reexec;
   table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table;
   table_count= top_join_tab_count= 1;
 
@@ -11417,13 +11422,16 @@ bool error_if_full_join(JOIN *join)
 void JOIN_TAB::cleanup()
 {
   DBUG_ENTER("JOIN_TAB::cleanup");
-  DBUG_PRINT("enter", ("table %s.%s",
+  DBUG_PRINT("enter", ("tab: %p  table %s.%s",
+                       this,
                        (table ? table->s->db.str : "?"),
                        (table ? table->s->table_name.str : "?")));
   delete select;
   select= 0;
   delete quick;
   quick= 0;
+  delete filesort;
+  filesort= 0;
   if (cache)
   {
     cache->free();
@@ -11822,8 +11830,8 @@ void JOIN::cleanup(bool full)
       JOIN_TAB *first_tab= first_top_level_tab(this, WITHOUT_CONST_TABLES);
       if (first_tab->table)
       {
-        free_io_cache(first_tab->table);
-        filesort_free_buffers(first_tab->table, full);
+        delete first_tab->filesort;
+        first_tab->filesort= 0;
       }
     }
     if (full)
@@ -17598,7 +17606,6 @@ free_tmp_table(THD *thd, TABLE *entry)
   /* free blobs */
   for (Field **ptr=entry->field ; *ptr ; ptr++)
     (*ptr)->free();
-  free_io_cache(entry);
 
   if (entry->temp_pool_slot != MY_BIT_NONE)
     bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
@@ -19061,7 +19068,7 @@ int join_init_read_record(JOIN_TAB *tab)
   if (!tab->preread_init_done && tab->preread_init())
     return 1;
   if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
-                       tab->select,1,1, FALSE))
+                       tab->select, tab->filesort, 1,1, FALSE))
     return 1;
   return (*tab->read_record.read_record)(&tab->read_record);
 }
@@ -19079,7 +19086,7 @@ join_read_record_no_init(JOIN_TAB *tab)
   save_copy_end= tab->read_record.copy_field_end;
   
   init_read_record(&tab->read_record, tab->join->thd, tab->table,
-		   tab->select,1,1, FALSE);
+		   tab->select, tab->filesort, 1, 1, FALSE);
 
   tab->read_record.copy_field=     save_copy;
   tab->read_record.copy_field_end= save_copy_end;
@@ -19324,11 +19331,9 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  TABLE *table=jt->table;
 
 	  join->select_options ^= OPTION_FOUND_ROWS;
-	  if (table->sort.record_pointers ||
-	      (table->sort.io_cache && my_b_inited(table->sort.io_cache)))
+	  if (jt->filesort)                     // If filesort was used
 	  {
-	    /* Using filesort */
-	    join->send_records= table->sort.found_records;
+	    join->send_records= jt->filesort->found_rows;
 	  }
 	  else
 	  {
@@ -21058,8 +21063,7 @@ use_filesort:
      'join' is modified to use this index.
    - If no index, create with filesort() an index file that can be used to
      retrieve rows in order (should be done with 'read_record').
-     The sorted data is stored in tab->table and will be freed when calling
-     free_io_cache(tab->table).
+     The sorted data is stored in tab->filesort
 
   RETURN VALUES
     0		ok
@@ -21072,15 +21076,12 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 		  ha_rows filesort_limit, ha_rows select_limit,
                   bool is_order_by)
 {
-  uint length= 0;
-  ha_rows examined_rows;
-  ha_rows found_rows;
-  ha_rows filesort_retval= HA_POS_ERROR;
+  uint length;
   TABLE *table;
   SQL_SELECT *select;
   JOIN_TAB *tab;
-  int err= 0;
   bool quick_created= FALSE;
+  SORT_INFO *file_sort= 0;
   DBUG_ENTER("create_sort_index");
 
   if (join->table_count == join->const_tables)
@@ -21165,15 +21166,19 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   }
   tab->update_explain_data(join->const_tables);
 
+  /*
+    Calculate length of join->order as this may be longer than 'order',
+    which may come from 'group by'. This is needed as join->sortorder is
+    used both for grouping and ordering.
+  */
+  length= 0;
   for (ORDER *ord= join->order; ord; ord= ord->next)
     length++;
-  if (!(join->sortorder= 
+
+    if (!(join->sortorder= 
         make_unireg_sortorder(thd, order, &length, join->sortorder)))
     goto err;				/* purecov: inspected */
 
-  table->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
-                                             MYF(MY_WME | MY_ZEROFILL|
-                                                 MY_THREAD_SPECIFIC));
   table->status=0;				// May be wrong if quick_select
 
   if (!tab->preread_init_done && tab->preread_init())
@@ -21217,12 +21222,18 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 
   if (table->s->tmp_table)
     table->file->info(HA_STATUS_VARIABLE);	// Get record count
-  filesort_retval= filesort(thd, table, join->sortorder, length,
-                            select, filesort_limit, 0,
-                            &examined_rows, &found_rows, 
-                            join->explain->ops_tracker.report_sorting(thd));
-  table->sort.found_records= filesort_retval;
-  tab->records= join->select_options & OPTION_FOUND_ROWS ? found_rows : filesort_retval;
+  file_sort= filesort(thd, table, join->sortorder, length,
+                      select, filesort_limit, 0,
+                      join->explain->ops_tracker.report_sorting(thd));
+  DBUG_ASSERT(tab->filesort == 0);
+  tab->filesort= file_sort;
+  tab->records= 0;
+  if (file_sort)
+  {
+    tab->records= join->select_options & OPTION_FOUND_ROWS ?
+      file_sort->found_rows : file_sort->return_rows;
+    tab->join->join_examined_rows+= file_sort->examined_rows;
+  }
 
   if (quick_created)
   {
@@ -21245,12 +21256,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   tab->type=JT_ALL;				// Read with normal read_record
   tab->read_first_record= join_init_read_record;
   tab->table->file->ha_index_or_rnd_end();
-  
-  if (err)
-    goto err;
 
-  tab->join->join_examined_rows+= examined_rows;
-  DBUG_RETURN(filesort_retval == HA_POS_ERROR);
+  DBUG_RETURN(file_sort == 0);
 err:
   DBUG_RETURN(-1);
 }
@@ -21373,7 +21380,6 @@ remove_duplicates(JOIN *join, TABLE *table, List<Item> &fields, Item *having)
   if (thd->killed == ABORT_QUERY)
     thd->reset_killed();
 
-  free_io_cache(table);				// Safety
   table->file->info(HA_STATUS_VARIABLE);
   if (table->s->db_type() == heap_hton ||
       (!table->s->blob_fields &&
@@ -23136,8 +23142,8 @@ static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
     }
     join_tab->set_select_cond(cond, __LINE__);
   }
-  else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, 0,
-                                          &error)))
+  else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond,
+                                          (SORT_INFO*) 0, 0, &error)))
     join_tab->set_select_cond(cond, __LINE__);
 
   DBUG_RETURN(error ? TRUE : FALSE);
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 95550f56450..e20cf6b403c 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -32,6 +32,7 @@
 #include "sql_array.h"                        /* Array */
 #include "records.h"                          /* READ_RECORD */
 #include "opt_range.h"                /* SQL_SELECT, QUICK_SELECT_I */
+#include "filesort.h"
 
 /* Values in optimize */
 #define KEY_OPTIMIZE_EXISTS		1
@@ -236,6 +237,7 @@ typedef struct st_join_table {
     For join tabs that are inside an SJM bush: root of the bush
   */
   st_join_table *bush_root_tab;
+  SORT_INFO *filesort;
 
   /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */
   bool          last_leaf_in_bush;
diff --git a/sql/sql_servers.cc b/sql/sql_servers.cc
index 0138c3e5a3b..196c138c04d 100644
--- a/sql/sql_servers.cc
+++ b/sql/sql_servers.cc
@@ -205,8 +205,8 @@ static bool servers_load(THD *thd, TABLE_LIST *tables)
   free_root(&mem, MYF(0));
   init_sql_alloc(&mem, ACL_ALLOC_BLOCK_SIZE, 0, MYF(0));
 
-  if (init_read_record(&read_record_info,thd,table=tables[0].table,NULL,1,0, 
-                       FALSE))
+  if (init_read_record(&read_record_info,thd,table=tables[0].table, NULL, NULL,
+                       1,0, FALSE))
     DBUG_RETURN(1);
   while (!(read_record_info.read_record(&read_record_info)))
   {
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index c64c242c1c2..7ca45e79493 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -39,7 +39,6 @@
 #include "tztime.h"                             // struct Time_zone
 #include "sql_acl.h"     // TABLE_ACLS, check_grant, DB_ACLS, acl_get,
                          // check_grant_db
-#include "filesort.h"    // filesort_free_buffers
 #include "sp.h"
 #include "sp_head.h"
 #include "sp_pcontext.h"
@@ -8066,8 +8065,6 @@ bool get_schema_tables_result(JOIN *join,
         table_list->table->file->extra(HA_EXTRA_NO_CACHE);
         table_list->table->file->extra(HA_EXTRA_RESET_STATE);
         table_list->table->file->ha_delete_all_rows();
-        free_io_cache(table_list->table);
-        filesort_free_buffers(table_list->table,1);
         table_list->table->null_row= 0;
       }
       else
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 1622d9df360..6c97ad7e9ab 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -16,8 +16,6 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
-#include "m_string.h"                           /* memset */
-#include "my_global.h"                          /* uchar */
 #include "my_base.h"                            /* ha_rows */
 #include "my_sys.h"                             /* qsort2_cmp */
 #include "queues.h"
@@ -71,7 +69,6 @@ public:
   uint rec_length;            // Length of sorted records.
   uint sort_length;           // Length of sorted columns.
   uint ref_length;            // Length of record ref.
-  uint addon_length;          // Length of added packed fields.
   uint res_length;            // Length of records in final sorted file/buffer.
   uint max_keys_per_buffer;   // Max keys / buffer.
   uint min_dupl_count;
@@ -81,6 +78,8 @@ public:
   SORT_FIELD *local_sortorder;
   SORT_FIELD *end;
   SORT_ADDON_FIELD *addon_field; // Descriptors for companion fields.
+  LEX_STRING addon_buf;          // Buffer & length of added packed fields.
+
   uchar *unique_buff;
   bool not_killable;
   char* tmp_buffer;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 324741fb55e..f6811b020eb 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -28,6 +28,7 @@
 #include "key.h"
 #include "sql_statistics.h"
 #include "opt_range.h"
+#include "uniques.h"
 #include "my_atomic.h"
 
 /*
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 7f3b4fe6595..dad51139af3 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -9351,15 +9351,14 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
   int error= 1;
   Copy_field *copy= NULL, *copy_end;
   ha_rows found_count= 0, delete_count= 0;
-  uint length= 0;
   SORT_FIELD *sortorder;
+  SORT_INFO  *file_sort= 0;
   READ_RECORD info;
   TABLE_LIST   tables;
   List<Item>   fields;
   List<Item>   all_fields;
-  ha_rows examined_rows;
-  ha_rows found_rows;
   bool auto_increment_field_copied= 0;
+  bool init_read_record_done= 0;
   ulonglong save_sql_mode= thd->variables.sql_mode;
   ulonglong prev_insert_id, time_to_report_progress;
   Field **dfield_ptr= to->default_field;
@@ -9442,9 +9441,7 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
     }
     else
     {
-      from->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
-                                                MYF(MY_FAE | MY_ZEROFILL |
-                                                    MY_THREAD_SPECIFIC));
+      uint length= 0;
       bzero((char *) &tables, sizeof(tables));
       tables.table= from;
       tables.alias= tables.table_name= from->s->table_name.str;
@@ -9456,12 +9453,10 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
           setup_order(thd, thd->lex->select_lex.ref_pointer_array,
                       &tables, fields, all_fields, order) ||
           !(sortorder= make_unireg_sortorder(thd, order, &length, NULL)) ||
-          (from->sort.found_records= filesort(thd, from, sortorder, length,
-                                              NULL, HA_POS_ERROR,
-                                              true,
-                                              &examined_rows, &found_rows,
-                                              &dummy_tracker)) ==
-          HA_POS_ERROR)
+          !(file_sort= filesort(thd, from, sortorder, length,
+                               NULL, HA_POS_ERROR,
+                               true,
+                                &dummy_tracker)))
         goto err;
     }
     thd_progress_next_stage(thd);
@@ -9471,8 +9466,10 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
   /* Tell handler that we have values for all columns in the to table */
   to->use_all_columns();
   to->mark_virtual_columns_for_write(TRUE);
-  if (init_read_record(&info, thd, from, (SQL_SELECT *) 0, 1, 1, FALSE))
+  if (init_read_record(&info, thd, from, (SQL_SELECT *) 0, file_sort, 1, 1,
+                       FALSE))
     goto err;
+  init_read_record_done= 1;
 
   if (ignore && !alter_ctx->fk_error_if_delete_row)
     to->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
@@ -9587,9 +9584,6 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
       found_count++;
     thd->get_stmt_da()->inc_current_row_for_warning();
   }
-  end_read_record(&info);
-  free_io_cache(from);
-  delete [] copy;
 
   THD_STAGE_INFO(thd, stage_enabling_keys);
   thd_progress_next_stage(thd);
@@ -9610,6 +9604,12 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
     error= 1;
 
  err:
+  /* Free resources */
+  if (init_read_record_done)
+    end_read_record(&info);
+  delete [] copy;
+  delete file_sort;
+
   thd->variables.sql_mode= save_sql_mode;
   thd->abort_on_warning= 0;
   *copied= found_count;
diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc
index 0b294b5af8c..502bc88c489 100644
--- a/sql/sql_udf.cc
+++ b/sql/sql_udf.cc
@@ -180,7 +180,8 @@ void udf_init()
   }
 
   table= tables.table;
-  if (init_read_record(&read_record_info, new_thd, table, NULL,1,0,FALSE))
+  if (init_read_record(&read_record_info, new_thd, table, NULL, NULL, 1, 0,
+                       FALSE))
   {
     sql_print_error("Could not initialize init_read_record; udf's not "
                     "loaded");
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 069eadc7519..6c85840a5d1 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -173,7 +173,8 @@ select_union::create_result_table(THD *thd_arg, List<Item> *column_types,
 
 
 /**
-  Reset and empty the temporary table that stores the materialized query result.
+  Reset and empty the temporary table that stores the materialized query
+  result.
 
   @note The cleanup performed here is exactly the same as for the two temp
   tables of JOIN - exec_tmp_table_[1 | 2].
@@ -183,8 +184,6 @@ void select_union::cleanup()
 {
   table->file->extra(HA_EXTRA_RESET_STATE);
   table->file->ha_delete_all_rows();
-  free_io_cache(table);
-  filesort_free_buffers(table,0);
 }
 
 
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 55e5cf2c526..47206d75f1b 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -270,6 +270,7 @@ int mysql_update(THD *thd,
   key_map	old_covering_keys;
   TABLE		*table;
   SQL_SELECT	*select= NULL;
+  SORT_INFO     *file_sort= 0;
   READ_RECORD	info;
   SELECT_LEX    *select_lex= &thd->lex->select_lex;
   ulonglong     id;
@@ -420,7 +421,7 @@ int mysql_update(THD *thd,
   table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
   set_statistics_for_table(thd, table);
 
-  select= make_select(table, 0, 0, conds, 0, &error);
+  select= make_select(table, 0, 0, conds, (SORT_INFO*) 0, 0, &error);
   if (error || !limit || thd->is_error() ||
       (select && select->check_quick(thd, safe_update, limit)))
   {
@@ -558,26 +559,18 @@ int mysql_update(THD *thd,
       */
       uint         length= 0;
       SORT_FIELD  *sortorder;
-      ha_rows examined_rows;
-      ha_rows found_rows;
 
-      table->sort.io_cache = (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
-						    MYF(MY_FAE | MY_ZEROFILL |
-                                                        MY_THREAD_SPECIFIC));
       Filesort_tracker *fs_tracker= 
         thd->lex->explain->get_upd_del_plan()->filesort_tracker;
 
       if (!(sortorder=make_unireg_sortorder(thd, order, &length, NULL)) ||
-          (table->sort.found_records= filesort(thd, table, sortorder, length,
-                                               select, limit,
-                                               true,
-                                               &examined_rows, &found_rows,
-                                               fs_tracker))
-          == HA_POS_ERROR)
-      {
+          !(file_sort= filesort(thd, table, sortorder, length,
+                                select, limit,
+                                true,
+                                fs_tracker)))
 	goto err;
-      }
-      thd->inc_examined_row_count(examined_rows);
+      thd->inc_examined_row_count(file_sort->examined_rows);
+
       /*
 	Filesort has already found and selected the rows we want to update,
 	so we don't need the where clause
@@ -618,7 +611,7 @@ int mysql_update(THD *thd,
       */
 
       if (query_plan.index == MAX_KEY || (select && select->quick))
-        error= init_read_record(&info, thd, table, select, 0, 1, FALSE);
+        error= init_read_record(&info, thd, table, select, NULL, 0, 1, FALSE);
       else
         error= init_read_record_idx(&info, thd, table, 1, query_plan.index,
                                     reverse);
@@ -662,8 +655,9 @@ int mysql_update(THD *thd,
 	else
         {
           /*
-            Don't try unlocking the row if skip_record reported an error since in
-            this case the transaction might have been rolled back already.
+            Don't try unlocking the row if skip_record reported an
+            error since in this case the transaction might have been
+            rolled back already.
           */
           if (error < 0)
           {
@@ -712,7 +706,7 @@ int mysql_update(THD *thd,
   if (select && select->quick && select->quick->reset())
     goto err;
   table->file->try_semi_consistent_read(1);
-  if (init_read_record(&info, thd, table, select, 0, 1, FALSE))
+  if (init_read_record(&info, thd, table, select, file_sort, 0, 1, FALSE))
     goto err;
 
   updated= found= 0;
@@ -1020,6 +1014,7 @@ int mysql_update(THD *thd,
   }
   DBUG_ASSERT(transactional_table || !updated || thd->transaction.stmt.modified_non_trans_table);
   free_underlaid_joins(thd, select_lex);
+  delete file_sort;
 
   /* If LAST_INSERT_ID(X) was used, report X */
   id= thd->arg_of_last_insert_id_function ?
@@ -1053,6 +1048,7 @@ int mysql_update(THD *thd,
 
 err:
   delete select;
+  delete file_sort;
   free_underlaid_joins(thd, select_lex);
   table->disable_keyread();
   thd->abort_on_warning= 0;
diff --git a/sql/table.h b/sql/table.h
index 5d27d8e97fd..46309d43ad6 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -322,55 +322,6 @@ enum enum_vcol_update_mode
   VCOL_UPDATE_ALL
 };
 
-class Filesort_info
-{
-  /// Buffer for sorting keys.
-  Filesort_buffer filesort_buffer;
-
-public:
-  IO_CACHE *io_cache;           /* If sorted through filesort */
-  uchar     *buffpek;           /* Buffer for buffpek structures */
-  uint      buffpek_len;        /* Max number of buffpeks in the buffer */
-  uchar     *addon_buf;         /* Pointer to a buffer if sorted with fields */
-  size_t    addon_length;       /* Length of the buffer */
-  struct st_sort_addon_field *addon_field;     /* Pointer to the fields info */
-  void    (*unpack)(struct st_sort_addon_field *, uchar *, uchar *); /* To unpack back */
-  uchar     *record_pointers;    /* If sorted in memory */
-  ha_rows   found_records;      /* How many records in sort */
-
-  /** Sort filesort_buffer */
-  void sort_buffer(Sort_param *param, uint count)
-  { filesort_buffer.sort_buffer(param, count); }
-
-  /**
-     Accessors for Filesort_buffer (which @c).
-  */
-  uchar *get_record_buffer(uint idx)
-  { return filesort_buffer.get_record_buffer(idx); }
-
-  uchar **get_sort_keys()
-  { return filesort_buffer.get_sort_keys(); }
-
-  uchar **alloc_sort_buffer(uint num_records, uint record_length)
-  { return filesort_buffer.alloc_sort_buffer(num_records, record_length); }
-
-  bool check_sort_buffer_properties(uint num_records, uint record_length)
-  {
-    return filesort_buffer.check_sort_buffer_properties(num_records,
-                                                        record_length);
-  }
-
-  void free_sort_buffer()
-  { filesort_buffer.free_sort_buffer(); }
-
-  void init_record_pointers()
-  { filesort_buffer.init_record_pointers(); }
-
-  size_t sort_buffer_size() const
-  { return filesort_buffer.sort_buffer_size(); }
-};
-
-
 class Field_blob;
 class Table_triggers_list;
 
@@ -1246,7 +1197,6 @@ public:
   REGINFO reginfo;			/* field connections */
   MEM_ROOT mem_root;
   GRANT_INFO grant;
-  Filesort_info sort;
   /*
     The arena which the items for expressions from the table definition
     are associated with.  
diff --git a/sql/uniques.cc b/sql/uniques.cc
index 63eb6e0eb90..f2fa0bf7b1a 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -37,7 +37,9 @@
 #include "sql_sort.h"
 #include "queues.h"                             // QUEUE
 #include "my_tree.h"                            // element_count
-#include "sql_class.h"                          // Unique
+#include "uniques.h"	                        // Unique
+#include "sql_sort.h"
+#include "myisamchk.h"                          // BUFFPEK
 
 int unique_write_to_file(uchar* key, element_count count, Unique *unique)
 {
@@ -58,8 +60,8 @@ int unique_write_to_file_with_count(uchar* key, element_count count, Unique *uni
 
 int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
 {
-  memcpy(unique->record_pointers, key, unique->size);
-  unique->record_pointers+=unique->size;
+  memcpy(unique->sort.record_pointers, key, unique->size);
+  unique->sort.record_pointers+=unique->size;
   return 0;
 }
 
@@ -67,8 +69,8 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq
 {
   if (count >= unique->min_dupl_count)
   {
-    memcpy(unique->record_pointers, key, unique->size);
-    unique->record_pointers+=unique->size;
+    memcpy(unique->sort.record_pointers, key, unique->size);
+    unique->sort.record_pointers+=unique->size;
   }
   else
     unique->filtered_out_elems++;
@@ -80,16 +82,15 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
 	       uint size_arg, ulonglong max_in_memory_size_arg,
                uint min_dupl_count_arg)
   :max_in_memory_size(max_in_memory_size_arg),
-   record_pointers(NULL),
    size(size_arg),
    elements(0)
 {
+  my_b_clear(&file);
   min_dupl_count= min_dupl_count_arg;
   full_size= size;
   if (min_dupl_count_arg)
     full_size+= sizeof(element_count);
   with_counters= MY_TEST(min_dupl_count_arg);
-  my_b_clear(&file);
   init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func,
             NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
   /* If the following fail's the next add will also fail */
@@ -408,8 +409,10 @@ Unique::reset()
     reset_dynamic(&file_ptrs);
     reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
   }
+  my_free(sort.record_pointers);
   elements= 0;
   tree.flag= 0;
+  sort.record_pointers= 0;
 }
 
 /*
@@ -636,7 +639,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
   if (elements == 0)                       /* the whole tree is in memory */
     return tree_walk(&tree, action, walk_action_arg, left_root_right);
 
-  table->sort.found_records=elements+tree.elements_in_tree;
+  sort.return_rows= elements+tree.elements_in_tree;
   /* flush current tree to the file to have some memory for merge buffer */
   if (flush())
     return 1;
@@ -663,9 +666,11 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
 
 /*
   DESCRIPTION
-    Perform multi-pass sort merge of the elements accessed through table->sort,
-    using the buffer buff as the merge buffer. The last pass is not performed
-    if without_last_merge is TRUE.
+
+  Perform multi-pass sort merge of the elements using the buffer buff as
+  the merge buffer. The last pass is not performed if without_last_merge is
+  TRUE.
+
   SYNOPSIS
     Unique:merge()
   All params are 'IN':
@@ -679,23 +684,19 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
 
 bool Unique::merge(TABLE *table, uchar *buff, bool without_last_merge)
 {
-  IO_CACHE *outfile= table->sort.io_cache;
+  IO_CACHE *outfile= &sort.io_cache;
   BUFFPEK *file_ptr= (BUFFPEK*) file_ptrs.buffer;
   uint maxbuffer= file_ptrs.elements - 1;
   my_off_t save_pos;
   bool error= 1;
+  Sort_param sort_param; 
 
-  /* Open cached file if it isn't open */
-  if (!outfile)
-    outfile= table->sort.io_cache= (IO_CACHE*) my_malloc(sizeof(IO_CACHE),
-                                          MYF(MY_THREAD_SPECIFIC|MY_ZEROFILL));
-  if (!outfile ||
-      (! my_b_inited(outfile) &&
-       open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
-                        MYF(MY_WME))))
+  /* Open cached file for table records if it isn't open */
+  if (! my_b_inited(outfile) &&
+      open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
+                       MYF(MY_WME)))
     return 1;
 
-  Sort_param sort_param; 
   bzero((char*) &sort_param,sizeof(sort_param));
   sort_param.max_rows= elements;
   sort_param.sort_form= table;
@@ -744,44 +745,49 @@ err:
 
 
 /*
-  Modify the TABLE element so that when one calls init_records()
-  the rows will be read in priority order.
+  Allocate memory that can be used with init_records() so that
+  rows will be read in priority order.
 */
 
 bool Unique::get(TABLE *table)
 {
   bool rc= 1;
   uchar *sort_buffer= NULL;
-  table->sort.found_records= elements+tree.elements_in_tree;
+  sort.return_rows= elements+tree.elements_in_tree;
+  DBUG_ENTER("Unique::get");
 
   if (my_b_tell(&file) == 0)
   {
     /* Whole tree is in memory;  Don't use disk if you don't need to */
-    if ((record_pointers=table->sort.record_pointers= (uchar*)
+    if ((sort.record_pointers= (uchar*)
 	 my_malloc(size * tree.elements_in_tree, MYF(MY_THREAD_SPECIFIC))))
     {
+      uchar *save_record_pointers= sort.record_pointers;
       tree_walk_action action= min_dupl_count ?
 		         (tree_walk_action) unique_intersect_write_to_ptrs :
 		         (tree_walk_action) unique_write_to_ptrs;
       filtered_out_elems= 0;
       (void) tree_walk(&tree, action,
 		       this, left_root_right);
-      table->sort.found_records-= filtered_out_elems;
-      return 0;
+      /* Restore record_pointers that was changed in by 'action' above */
+      sort.record_pointers= save_record_pointers;
+      sort.return_rows-= filtered_out_elems;
+      DBUG_RETURN(0);
     }
   }
   /* Not enough memory; Save the result to file && free memory used by tree */
   if (flush())
-    return 1;
+    DBUG_RETURN(1);
   size_t buff_sz= (max_in_memory_size / full_size + 1) * full_size;
-  if (!(sort_buffer= (uchar*) my_malloc(buff_sz, MYF(MY_THREAD_SPECIFIC|MY_WME))))
-    return 1;
+  if (!(sort_buffer= (uchar*) my_malloc(buff_sz,
+                                        MYF(MY_THREAD_SPECIFIC|MY_WME))))
+    DBUG_RETURN(1);
 
   if (merge(table, sort_buffer, FALSE))
-    goto err;  
+    goto err;
   rc= 0;  
 
 err:  
   my_free(sort_buffer);  
-  return rc;
+  DBUG_RETURN(rc);
 }
diff --git a/sql/uniques.h b/sql/uniques.h
new file mode 100644
index 00000000000..0210e879788
--- /dev/null
+++ b/sql/uniques.h
@@ -0,0 +1,100 @@
+/* Copyright (c) 2016 MariaDB corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
+
+#ifndef UNIQUE_INCLUDED
+#define UNIQUE_INCLUDED
+
+#include "filesort.h"
+
+/*
+   Unique -- class for unique (removing of duplicates).
+   Puts all values to the TREE. If the tree becomes too big,
+   it's dumped to the file. User can request sorted values, or
+   just iterate through them. In the last case tree merging is performed in
+   memory simultaneously with iteration, so it should be ~2-3x faster.
+ */
+
+class Unique :public Sql_alloc
+{
+  DYNAMIC_ARRAY file_ptrs;
+  ulong max_elements;
+  ulonglong max_in_memory_size;
+  IO_CACHE file;
+  TREE tree;
+  ulong filtered_out_elems;
+  uint size;
+  uint full_size;
+  uint min_dupl_count;   /* always 0 for unions, > 0 for intersections */
+  bool with_counters;
+
+  bool merge(TABLE *table, uchar *buff, bool without_last_merge);
+  bool flush();
+
+public:
+  ulong elements;
+  SORT_INFO sort;
+  Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
+	 uint size_arg, ulonglong max_in_memory_size_arg,
+         uint min_dupl_count_arg= 0);
+  ~Unique();
+  ulong elements_in_tree() { return tree.elements_in_tree; }
+  inline bool unique_add(void *ptr)
+  {
+    DBUG_ENTER("unique_add");
+    DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
+    if (!(tree.flag & TREE_ONLY_DUPS) && 
+        tree.elements_in_tree >= max_elements && flush())
+      DBUG_RETURN(1);
+    DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
+  }
+
+  bool is_in_memory() { return (my_b_tell(&file) == 0); }
+  void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
+
+  bool get(TABLE *table);
+  
+  /* Cost of searching for an element in the tree */
+  inline static double get_search_cost(ulonglong tree_elems, uint compare_factor)
+  {
+    return log((double) tree_elems) / (compare_factor * M_LN2);
+  }  
+
+  static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+                             ulonglong max_in_memory_size, uint compare_factor,
+                             bool intersect_fl, bool *in_memory);
+  inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
+                                            ulonglong max_in_memory_size)
+  {
+    register ulonglong max_elems_in_tree=
+      max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
+    return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
+  }
+
+  void reset();
+  bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
+
+  uint get_size() const { return size; }
+  ulonglong get_max_in_memory_size() const { return max_in_memory_size; }
+
+  friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
+  friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
+
+  friend int unique_write_to_file_with_count(uchar* key, element_count count,
+                                             Unique *unique);
+  friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, 
+				            Unique *unique);
+};
+
+#endif /* UNIQUE_INCLUDED */
diff --git a/storage/spider/spd_trx.cc b/storage/spider/spd_trx.cc
index d7127fa0084..b376265c1e7 100644
--- a/storage/spider/spd_trx.cc
+++ b/storage/spider/spd_trx.cc
@@ -2683,7 +2683,8 @@ int spider_initinal_xa_recover(
         FALSE, open_tables_backup, TRUE, &error_num))
     )
       goto error_open_table;
-    init_read_record(read_record, thd, table_xa, NULL, TRUE, FALSE, FALSE);
+    init_read_record(read_record, thd, table_xa, NULL, NULL, TRUE, FALSE,
+                     FALSE);
   }
   SPD_INIT_ALLOC_ROOT(&mem_root, 4096, 0, MYF(MY_WME));
   while ((!(read_record->read_record(read_record))) && cnt < (int) len)
author	Monty <monty@mariadb.org>	2016-03-22 21:51:59 +0200
committer	Monty <monty@mariadb.org>	2016-03-22 23:44:52 +0200
commit	260dd476b057b759af7973550b560dc2f56e18fd (patch)
tree	ef55ede44a6f47171ca9f2a2121f377a6ea15832
parent	d0a47704c5d4360a3076c0e6b8abec186fac1f39 (diff)
download	mariadb-git-260dd476b057b759af7973550b560dc2f56e18fd.tar.gz