MDEV-415: Back-port of the WL task #1393 from the mysql-5.6 code line.

The task adds a more efficient handling of the queries with ORDER BY order LIMIT n, such that n is small enough and no indexes are used for order.
author: Igor Babaev <igor@askmonty.org> 2012-09-01 14:21:59 -0700
committer: Igor Babaev <igor@askmonty.org> 2012-09-01 14:21:59 -0700
commit: a6b88f1431238152643e41979ce10b9bbdac2a82 (patch)
tree: 42482ef66f3c59b255d299fc58ace8c1b0c518ea /sql
parent: 5a86a61219826aadf8d08cbc447fe438f2bf50c3 (diff)
download: mariadb-git-a6b88f1431238152643e41979ce10b9bbdac2a82.tar.gz
14 files changed, 1098 insertions, 216 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index ecf91fcf043..1607bfadb0e 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -39,6 +39,7 @@ ENDIF()
 SET (SQL_SOURCE
               ../sql-common/client.c derror.cc des_key_file.cc
                discover.cc ../libmysql/errmsg.c field.cc  field_conv.cc 
+               filesort_utils.cc
                filesort.cc gstream.cc sha2.cc
                signal_handler.cc
                handler.cc hash_filo.h sql_plugin_services.h
diff --git a/sql/bounded_queue.h b/sql/bounded_queue.h
new file mode 100644
index 00000000000..2d4e6cff96d
--- /dev/null
+++ b/sql/bounded_queue.h
@@ -0,0 +1,195 @@
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef BOUNDED_QUEUE_INCLUDED
+#define BOUNDED_QUEUE_INCLUDED
+
+#include <string.h>
+#include "my_global.h"
+#include "my_base.h"
+#include "my_sys.h"
+#include "queues.h"
+
+class Sort_param;
+
+/**
+  A priority queue with a fixed, limited size.
+
+  This is a wrapper on top of QUEUE and the queue_xxx() functions.
+  It keeps the top-N elements which are inserted.
+
+  Elements of type Element_type are pushed into the queue.
+  For each element, we call a user-supplied keymaker_function,
+  to generate a key of type Key_type for the element.
+  Instances of Key_type are compared with the user-supplied compare_function.
+
+  The underlying QUEUE implementation needs one extra element for replacing
+  the lowest/highest element when pushing into a full queue.
+ */
+template<typename Element_type, typename Key_type>
+class Bounded_queue
+{
+public:
+  Bounded_queue()
+  {
+    memset(&m_queue, 0, sizeof(m_queue));
+  }
+
+  ~Bounded_queue()
+  {
+    delete_queue(&m_queue);
+  }
+
+  /**
+     Function for making sort-key from input data.
+     @param param Sort parameters.
+     @param to    Where to put the key.
+     @param from  The input data.
+  */
+  typedef void (*keymaker_function)(Sort_param *param,
+                                    Key_type *to,
+                                    Element_type *from);
+
+  /**
+     Function for comparing two keys.
+     @param  n Pointer to number of bytes to compare.
+     @param  a First key.
+     @param  b Second key.
+     @retval -1, 0, or 1 depending on whether the left argument is 
+             less than, equal to, or greater than the right argument.
+   */
+  typedef int (*compare_function)(size_t *n, Key_type **a, Key_type **b);
+
+  /**
+    Initialize the queue.
+
+    @param max_elements   The size of the queue.
+    @param max_at_top     Set to true if you want biggest element on top.
+           false: We keep the n largest elements.
+                  pop() will return the smallest key in the result set.
+           true:  We keep the n smallest elements.
+                  pop() will return the largest key in the result set.
+    @param compare        Compare function for elements, takes 3 arguments.
+                          If NULL, we use get_ptr_compare(compare_length).
+    @param compare_length Length of the data (i.e. the keys) used for sorting.
+    @param keymaker       Function which generates keys for elements.
+    @param sort_param     Sort parameters.
+    @param sort_keys      Array of pointers to keys to sort.
+
+    @retval 0 OK, 1 Could not allocate memory.
+
+    We do *not* take ownership of any of the input pointer arguments.
+   */
+  int init(ha_rows max_elements, bool max_at_top,
+           compare_function compare, size_t compare_length,
+           keymaker_function keymaker, Sort_param *sort_param,
+           Key_type **sort_keys);
+
+  /**
+    Pushes an element on the queue.
+    If the queue is already full, we discard one element.
+    Calls keymaker_function to generate a key for the element.
+
+    @param element        The element to be pushed.
+   */
+  void push(Element_type *element);
+
+  /**
+    Removes the top element from the queue.
+
+    @retval Pointer to the (key of the) removed element.
+
+    @note This function is for unit testing, where we push elements into to the
+          queue, and test that the appropriate keys are retained.
+          Interleaving of push() and pop() operations has not been tested.
+   */
+  Key_type **pop()
+  {
+    // Don't return the extra element to the client code.
+    if (queue_is_full((&m_queue)))
+      queue_remove(&m_queue, 0);
+    DBUG_ASSERT(m_queue.elements > 0);
+    if (m_queue.elements == 0)
+      return NULL;
+    return reinterpret_cast<Key_type**>(queue_remove(&m_queue, 0));
+  }
+
+  /**
+    The number of elements in the queue.
+   */
+  uint num_elements() const { return m_queue.elements; }
+
+  /**
+    Is the queue initialized?
+   */
+  bool is_initialized() const { return m_queue.max_elements > 0; }
+
+private:
+  Key_type         **m_sort_keys;
+  size_t             m_compare_length;
+  keymaker_function  m_keymaker;
+  Sort_param        *m_sort_param;
+  st_queue           m_queue;
+};
+
+
+template<typename Element_type, typename Key_type>
+int Bounded_queue<Element_type, Key_type>::init(ha_rows max_elements,
+                                                bool max_at_top,
+                                                compare_function compare,
+                                                size_t compare_length,
+                                                keymaker_function keymaker,
+                                                Sort_param *sort_param,
+                                                Key_type **sort_keys)
+{
+  DBUG_ASSERT(sort_keys != NULL);
+
+  m_sort_keys=      sort_keys;
+  m_compare_length= compare_length;
+  m_keymaker=       keymaker;
+  m_sort_param=     sort_param;
+  // init_queue() takes an uint, and also does (max_elements + 1)
+  if (max_elements >= (UINT_MAX - 1))
+    return 1;
+  if (compare == NULL)
+    compare=
+      reinterpret_cast<compare_function>(get_ptr_compare(compare_length));
+  // We allocate space for one extra element, for replace when queue is full.
+  return init_queue(&m_queue, (uint) max_elements + 1,
+                    0, max_at_top,
+                    reinterpret_cast<queue_compare>(compare),
+                    &m_compare_length, 0, 0);
+}
+
+
+template<typename Element_type, typename Key_type>
+void Bounded_queue<Element_type, Key_type>::push(Element_type *element)
+{
+  DBUG_ASSERT(is_initialized());
+  if (queue_is_full((&m_queue)))
+  {
+    // Replace top element with new key, and re-order the queue.
+    Key_type **pq_top= reinterpret_cast<Key_type **>(queue_top(&m_queue));
+    (*m_keymaker)(m_sort_param, *pq_top, element);
+    queue_replace_top(&m_queue);
+  } else {
+    // Insert new key into the queue.
+    (*m_keymaker)(m_sort_param, m_sort_keys[m_queue.elements], element);
+    queue_insert(&m_queue,
+                 reinterpret_cast<uchar*>(&m_sort_keys[m_queue.elements]));
+  }
+}
+
+#endif  // BOUNDED_QUEUE_INCLUDED
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 03379f2738a..407412e906a 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -34,6 +34,9 @@
 #include "sql_base.h"                           // update_virtual_fields
 #include "sql_test.h"                           // TEST_filesort
 #include "opt_range.h"                          // SQL_SELECT
+#include "bounded_queue.h"
+#include "filesort_utils.h"
+#include "sql_select.h"
 #include "log_slow.h"
 #include "debug_sync.h"
 
@@ -42,26 +45,72 @@
 if (my_b_write((file),(uchar*) (from),param->ref_length)) \
   DBUG_RETURN(1);
 
+#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
+template class Bounded_queue<uchar, uchar>;
+#endif
+
 	/* functions defined in this file */
 
 static uchar *read_buffpek_from_file(IO_CACHE *buffer_file, uint count,
                                      uchar *buf);
-static ha_rows find_all_keys(SORTPARAM *param,SQL_SELECT *select,
-			     uchar * *sort_keys, uchar *sort_keys_buf,
-                             IO_CACHE *buffer_file, IO_CACHE *tempfile);
-static int write_keys(SORTPARAM *param,uchar * *sort_keys,
-		      uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
-static void make_sortkey(SORTPARAM *param,uchar *to, uchar *ref_pos);
-static void register_used_fields(SORTPARAM *param);
-static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count, 
-                       FILESORT_INFO *table_sort);
+static ha_rows find_all_keys(Sort_param *param,SQL_SELECT *select,
+                             Filesort_info *fs_info,
+                             IO_CACHE *buffer_file,
+                             IO_CACHE *tempfile,
+                             Bounded_queue<uchar, uchar> *pq,
+                             ha_rows *found_rows);
+static int write_keys(Sort_param *param, Filesort_info *fs_info,
+                      uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
+static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos);
+static void register_used_fields(Sort_param *param);
+static bool save_index(Sort_param *param, uint count,
+                       Filesort_info *table_sort);
+static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos);
 static uint suffix_length(ulong string_length);
 static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
 		       bool *multi_byte_charset);
-static SORT_ADDON_FIELD *get_addon_fields(THD *thd, Field **ptabfield,
+static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data,
+                                          Field **ptabfield,
                                           uint sortlength, uint *plength);
 static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
                                 uchar *buff, uchar *buff_end);
+static bool check_if_pq_applicable(Sort_param *param, Filesort_info *info,
+                                   TABLE *table,
+                                   ha_rows records, ulong memory_available);
+
+
+void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
+                                   ulong max_length_for_sort_data,
+                                   ha_rows maxrows, bool sort_positions)
+{
+  sort_length= sortlen;
+  ref_length= table->file->ref_length;
+  if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
+      !table->fulltext_searched && !sort_positions)
+  {
+    /* 
+      Get the descriptors of all fields whose values are appended 
+      to sorted fields and get its total length in addon_length.
+    */
+    addon_field= get_addon_fields(max_length_for_sort_data,
+                                  table->field, sort_length, &addon_length);
+  }
+  if (addon_field)
+    res_length= addon_length;
+  else
+  {
+    res_length= ref_length;
+    /* 
+      The reference to the record is considered 
+      as an additional sorted field
+    */
+    sort_length+= ref_length;
+  }
+  rec_length= sort_length + addon_length;
+  max_rows= maxrows;
+}
+
+
 /**
   Sort a table.
   Creates a set of pointers that can be used to read the rows
@@ -74,15 +123,17 @@ static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
   The result set is stored in table->io_cache or
   table->record_pointers.
 
-  @param thd           Current thread
-  @param table		Table to sort
-  @param sortorder	How to sort the table
-  @param s_length	Number of elements in sortorder
-  @param select		condition to apply to the rows
-  @param max_rows	Return only this many rows
-  @param sort_positions	Set to 1 if we want to force sorting by position
-			(Needed by UPDATE/INSERT or ALTER TABLE)
-  @param examined_rows	Store number of examined rows here
+  @param      thd            Current thread
+  @param      table          Table to sort
+  @param      sortorder      How to sort the table
+  @param      s_length       Number of elements in sortorder
+  @param      select         Condition to apply to the rows
+  @param      max_rows       Return only this many rows
+  @param      sort_positions Set to TRUE if we want to force sorting by position
+                             (Needed by UPDATE/INSERT or ALTER TABLE or
+                              when rowids are required by executor)
+  @param[out] examined_rows  Store number of examined rows here
+  @param[out] found_rows     Store the number of found rows here
 
   @note
     If we sort by position (like if sort_positions is 1) filesort() will
@@ -92,30 +143,30 @@ static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
     HA_POS_ERROR	Error
   @retval
     \#			Number of rows
-  @retval
-    examined_rows	will be set to number of examined rows
 */
 
 ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 		 SQL_SELECT *select, ha_rows max_rows,
-                 bool sort_positions, ha_rows *examined_rows)
+                 bool sort_positions,
+                 ha_rows *examined_rows,
+                 ha_rows *found_rows)
 {
   int error;
   ulong memory_available= thd->variables.sortbuff_size;
-  ulong min_sort_memory;
   uint maxbuffer;
   BUFFPEK *buffpek;
   ha_rows num_rows= HA_POS_ERROR;
-  uchar **sort_keys= 0;
   IO_CACHE tempfile, buffpek_pointers, *outfile; 
-  SORTPARAM param;
+  Sort_param param;
   bool multi_byte_charset;
+  Bounded_queue<uchar, uchar> pq;
+
   DBUG_ENTER("filesort");
   DBUG_EXECUTE("info",TEST_filesort(sortorder,s_length););
 #ifdef SKIP_DBUG_IN_FILESORT
   DBUG_PUSH("");		/* No DBUG here */
 #endif
-  FILESORT_INFO table_sort;
+  Filesort_info table_sort= table->sort;
   TABLE_LIST *tab= table->pos_in_table_list;
   Item_subselect *subselect= tab ? tab->containing_subselect() : 0;
 
@@ -133,7 +184,6 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
     QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end 
     when index_merge select has finished with it.
   */
-  memcpy(&table_sort, &table->sort, sizeof(FILESORT_INFO));
   table->sort.io_cache= NULL;
   
   outfile= table_sort.io_cache;
@@ -141,43 +191,21 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   my_b_clear(&buffpek_pointers);
   buffpek=0;
   error= 1;
-  bzero((char*) &param,sizeof(param));
-  param.sort_length= sortlength(thd, sortorder, s_length, &multi_byte_charset);
-  param.ref_length= table->file->ref_length;
-  if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
-      !table->fulltext_searched && !sort_positions)
-  {
-    /* 
-      Get the descriptors of all fields whose values are appended 
-      to sorted fields and get its total length in param.spack_length.
-    */
-    param.addon_field= get_addon_fields(thd, table->field, 
-                                        param.sort_length,
-                                        &param.addon_length);
-  }
+
+  param.init_for_filesort(sortlength(thd, sortorder, s_length,
+                                     &multi_byte_charset),
+                          table,
+                          thd->variables.max_length_for_sort_data,
+                          max_rows, sort_positions);
 
   table_sort.addon_buf= 0;
   table_sort.addon_length= param.addon_length;
   table_sort.addon_field= param.addon_field;
   table_sort.unpack= unpack_addon_fields;
-  if (param.addon_field)
-  {
-    param.res_length= param.addon_length;
-    if (!(table_sort.addon_buf= (uchar *) my_malloc(param.addon_length,
-                                                    MYF(MY_WME))))
-      goto err;
-  }
-  else
-  {
-    param.res_length= param.ref_length;
-    /* 
-      The reference to the record is considered 
-      as an additional sorted field
-    */
-    param.sort_length+= param.ref_length;
-  }
-  param.rec_length= param.sort_length+param.addon_length;
-  param.max_rows= max_rows;
+  if (param.addon_field &&
+      !(table_sort.addon_buf=
+        (uchar *) my_malloc(param.addon_length, MYF(MY_WME))))
+    goto err;
 
   if (select && select->quick)
     status_var_increment(thd->status_var.filesort_range_count);
@@ -192,20 +220,54 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
       !(param.tmp_buffer= (char*) my_malloc(param.sort_length,MYF(MY_WME))))
     goto err;
 
-  min_sort_memory= max(MIN_SORT_MEMORY, param.sort_length * MERGEBUFF2);
-  if (!table_sort.sort_keys)
+  if (check_if_pq_applicable(&param, &table_sort,
+                             table, num_rows, memory_available))
+  {
+    DBUG_PRINT("info", ("filesort PQ is applicable"));
+    const size_t compare_length= param.sort_length;
+    if (pq.init(param.max_rows,
+                true,                           // max_at_top
+                NULL,                           // compare_function
+                compare_length,
+                &make_sortkey, &param, table_sort.get_sort_keys()))
+    {
+      /*
+       If we fail to init pq, we have to give up:
+       out of memory means my_malloc() will call my_error().
+      */
+      DBUG_PRINT("info", ("failed to allocate PQ"));
+      table_sort.free_sort_buffer();
+      DBUG_ASSERT(thd->is_error());
+      goto err;
+    }
+    // For PQ queries (with limit) we initialize all pointers.
+    table_sort.init_record_pointers();
+  }
+  else
   {
+    DBUG_PRINT("info", ("filesort PQ is not applicable"));
+
+    const ulong min_sort_memory=
+      max(MIN_SORT_MEMORY, param.sort_length*MERGEBUFF2);
     while (memory_available >= min_sort_memory)
     {
       ulong keys= memory_available / (param.rec_length + sizeof(char*));
-      table_sort.keys= (uint) min(num_rows, keys);
-
-      DBUG_EXECUTE_IF("make_sort_keys_alloc_fail",
-                      DBUG_SET("+d,simulate_out_of_memory"););
-
-      if ((table_sort.sort_keys=
-           (uchar**) my_malloc(table_sort.keys*(param.rec_length+sizeof(char*)),
-                               MYF(0))))
+      param.max_keys_per_buffer= (uint) min(num_rows, keys);
+      if (table_sort.get_sort_keys())
+      {
+        // If we have already allocated a buffer, it better have same size!
+        if (!table_sort.check_sort_buffer_properties(param.max_keys_per_buffer,
+                                                     param.rec_length))
+        {
+          /*
+            table->sort will still have a pointer to the same buffer,
+            but that will be overwritten by the assignment below.
+          */
+          table_sort.free_sort_buffer();
+        }
+      }
+      table_sort.alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length);
+      if (table_sort.get_sort_keys())
         break;
       ulong old_memory_available= memory_available;
       memory_available= memory_available/4*3;
@@ -213,40 +275,37 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
           old_memory_available > min_sort_memory)
         memory_available= min_sort_memory;
     }
+    if (memory_available < min_sort_memory)
+    {
+      my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
+      goto err;
+    }
   }
 
-  sort_keys= table_sort.sort_keys;
-  param.keys= table_sort.keys;
-  if (memory_available < min_sort_memory)
-  {
-    my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
-    goto err;
-  }
   if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
-		       DISK_BUFFER_SIZE, MYF(ME_ERROR | MY_WME)))
+		       DISK_BUFFER_SIZE, MYF(MY_WME)))
     goto err;
 
   param.sort_form= table;
   param.end=(param.local_sortorder=sortorder)+s_length;
-  num_rows= find_all_keys(&param,
-                          select,
-                          sort_keys,
-                          (uchar *)(sort_keys+param.keys),
+  num_rows= find_all_keys(&param, select,
+                          &table_sort,
                           &buffpek_pointers,
-                          &tempfile);
+                          &tempfile, 
+                          pq.is_initialized() ? &pq : NULL,
+                          found_rows);
   if (num_rows == HA_POS_ERROR)
     goto err;
+
   maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek));
 
   if (maxbuffer == 0)			// The whole set is in memory
   {
-    if (save_index(&param,sort_keys,(uint) num_rows, &table_sort))
+    if (save_index(&param, (uint) num_rows, &table_sort))
       goto err;
   }
   else
   {
-    thd->query_plan_flags|= QPLAN_FILESORT_DISK;
-
     /* filesort cannot handle zero-length records during merge. */
     DBUG_ASSERT(param.sort_length != 0);
 
@@ -265,7 +324,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 	/* Open cached file if it isn't open */
     if (! my_b_inited(outfile) &&
 	open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
-			  MYF(ME_ERROR | MY_WME)))
+			  MYF(MY_WME)))
       goto err;
     if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
       goto err;
@@ -274,18 +333,20 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
       Use also the space previously used by string pointers in sort_buffer
       for temporary key storage.
     */
-    param.keys=((param.keys *
-                 (param.rec_length+sizeof(char*))) /
-		param.rec_length - 1);
+    param.max_keys_per_buffer=((param.max_keys_per_buffer *
+                                (param.rec_length + sizeof(char*))) /
+                               param.rec_length - 1);
     maxbuffer--;				// Offset from 0
-    if (merge_many_buff(&param,(uchar*) sort_keys,buffpek,&maxbuffer,
+    if (merge_many_buff(&param,
+                        (uchar*) table_sort.get_sort_keys(),
+                        buffpek,&maxbuffer,
 			&tempfile))
       goto err;
     if (flush_io_cache(&tempfile) ||
 	reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
       goto err;
     if (merge_index(&param,
-                    (uchar*) sort_keys,
+                    (uchar*) table_sort.get_sort_keys(),
                     buffpek,
                     maxbuffer,
                     &tempfile,
@@ -300,12 +361,11 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
   }
   error= 0;
 
- err:
+  err:
   my_free(param.tmp_buffer);
   if (!subselect || !subselect->is_uncacheable())
   {
-    my_free(sort_keys);
-    table_sort.sort_keys= 0;
+    table_sort.free_sort_buffer();
     my_free(buffpek);
     table_sort.buffpek= 0;
     table_sort.buffpek_len= 0;
@@ -336,7 +396,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
                     thd->killed == ABORT_QUERY ? "" : thd->stmt_da->message());
 
     if (global_system_variables.log_warnings > 1)
-    {
+    { 
       sql_print_warning("%s, host: %s, user: %s, thread: %lu, query: %-.4096s",
                         ER_THD(thd, ER_FILSORT_ABORT),
                         thd->security_ctx->host_or_ip,
@@ -352,8 +412,13 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 #ifdef SKIP_DBUG_IN_FILESORT
   DBUG_POP();			/* Ok to DBUG */
 #endif
-  memcpy(&table->sort, &table_sort, sizeof(FILESORT_INFO));
-  DBUG_PRINT("exit",("num_rows: %ld", (long) num_rows));
+
+  // Assign the copy back!
+  table->sort= table_sort;
+
+  DBUG_PRINT("exit",
+             ("num_rows: %ld examined_rows: %ld found_rows: %ld",
+              (long) num_rows, (long) *examined_rows, (long) *found_rows));
   MYSQL_FILESORT_DONE(error, num_rows);
   DBUG_RETURN(error ? HA_POS_ERROR : num_rows);
 } /* filesort */
@@ -361,13 +426,13 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 
 void filesort_free_buffers(TABLE *table, bool full)
 {
+  DBUG_ENTER("filesort_free_buffers");
   my_free(table->sort.record_pointers);
   table->sort.record_pointers= NULL;
 
   if (full)
   {
-    my_free(table->sort.sort_keys);
-    table->sort.sort_keys= NULL;
+    table->sort.free_sort_buffer();
     my_free(table->sort.buffpek);
     table->sort.buffpek= NULL;
     table->sort.buffpek_len= 0;
@@ -377,6 +442,7 @@ void filesort_free_buffers(TABLE *table, bool full)
   my_free(table->sort.addon_field);
   table->sort.addon_buf= NULL;
   table->sort.addon_field= NULL;
+  DBUG_VOID_RETURN;
 }
 
 
@@ -455,8 +521,10 @@ static void dbug_print_record(TABLE *table, bool print_rowid)
 }
 #endif 
 
+
 /**
-  Search after sort_keys and write them into tempfile.
+  Search after sort_keys, and write them into tempfile
+  (if we run out of space in the sort_keys buffer).
   All produced sequences are guaranteed to be non-empty.
 
   @param param             Sorting parameter
@@ -465,19 +533,28 @@ static void dbug_print_record(TABLE *table, bool print_rowid)
   @param buffpek_pointers  File to write BUFFPEKs describing sorted segments
                            in tempfile.
   @param tempfile          File to write sorted sequences of sortkeys to.
+  @param pq                If !NULL, use it for keeping top N elements
+  @param [out] found_rows  The number of FOUND_ROWS().
+                           For a query with LIMIT, this value will typically
+                           be larger than the function return value.
 
   @note
     Basic idea:
     @verbatim
      while (get_next_sortkey())
      {
-       if (no free space in sort_keys buffers)
+       if (using priority queue)
+         push sort key into queue
+       else
        {
-         sort sort_keys buffer;
-         dump sorted sequence to 'tempfile';
-         dump BUFFPEK describing sequence location into 'buffpek_pointers';
+         if (no free space in sort_keys buffers)
+         {
+           sort sort_keys buffer;
+           dump sorted sequence to 'tempfile';
+           dump BUFFPEK describing sequence location into 'buffpek_pointers';
+         }
+         put sort key into 'sort_keys';
        }
-       put sort key into 'sort_keys';
      }
      if (sort_keys has some elements && dumped at least once)
        sort-dump-dump as above;
@@ -491,10 +568,12 @@ static void dbug_print_record(TABLE *table, bool print_rowid)
     HA_POS_ERROR on error.
 */
 
-static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
-			     uchar **sort_keys, uchar *sort_keys_buf,
+static ha_rows find_all_keys(Sort_param *param, SQL_SELECT *select,
+                             Filesort_info *fs_info,
 			     IO_CACHE *buffpek_pointers,
-			     IO_CACHE *tempfile)
+                             IO_CACHE *tempfile,
+                             Bounded_queue<uchar, uchar> *pq,
+                             ha_rows *found_rows)
 {
   int error,flag,quick_select;
   uint idx,indexpos,ref_length;
@@ -505,7 +584,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   volatile killed_state *killed= &thd->killed;
   handler *file;
   MY_BITMAP *save_read_set, *save_write_set, *save_vcol_set;
-  uchar *next_sort_key= sort_keys_buf;
+  
   DBUG_ENTER("find_all_keys");
   DBUG_PRINT("info",("using: %s",
                      (select ? select->quick ? "ranges" : "where":
@@ -519,6 +598,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
   ref_pos= ref_buff;
   quick_select=select && select->quick;
   record=0;
+  *found_rows= 0;
   flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
   if (flag)
     ref_pos= &file->ref[0];
@@ -532,7 +612,6 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 		    current_thd->variables.read_buff_size);
   }
 
-
   /* Remember original bitmaps */
   save_read_set=  sort_form->read_set;
   save_write_set= sort_form->write_set;
@@ -631,18 +710,23 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 
     if (write_record)
     {
-      if (idx == param->keys)
+       ++(*found_rows);
+      if (pq)
       {
-	if (write_keys(param, sort_keys,
-                       idx, buffpek_pointers, tempfile))
-	  DBUG_RETURN(HA_POS_ERROR);
-	idx= 0;
-        next_sort_key= sort_keys_buf;
-	indexpos++;
+        pq->push(ref_pos);
+        idx= pq->num_elements();
+      }
+      else
+      {
+        if (idx == param->max_keys_per_buffer)
+        {
+          if (write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
+             DBUG_RETURN(HA_POS_ERROR);
+	  idx= 0;
+	  indexpos++;
+        }
+        make_sortkey(param, fs_info->get_record_buffer(idx++), ref_pos);
       }
-      sort_keys[idx++]= next_sort_key;
-      make_sortkey(param, next_sort_key, ref_pos);
-      next_sort_key+= param->rec_length;
     }
     else
       file->unlock_row();
@@ -671,12 +755,12 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
     DBUG_RETURN(HA_POS_ERROR);			/* purecov: inspected */
   }
   if (indexpos && idx &&
-      write_keys(param, sort_keys,
-                 idx, buffpek_pointers, tempfile))
+      write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
     DBUG_RETURN(HA_POS_ERROR);			/* purecov: inspected */
   const ha_rows retval=
     my_b_inited(tempfile) ?
     (ha_rows) (my_b_tell(tempfile)/param->rec_length) : idx;
+  DBUG_PRINT("info", ("find_all_keys return %u", (uint) retval));
   DBUG_RETURN(retval);
 } /* find_all_keys */
 
@@ -704,21 +788,19 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
 */
 
 static int
-write_keys(SORTPARAM *param, register uchar **sort_keys, uint count,
+write_keys(Sort_param *param,  Filesort_info *fs_info, uint count,
            IO_CACHE *buffpek_pointers, IO_CACHE *tempfile)
 {
-  size_t sort_length, rec_length;
+  size_t rec_length;
   uchar **end;
   BUFFPEK buffpek;
   DBUG_ENTER("write_keys");
 
-  sort_length= param->sort_length;
   rec_length= param->rec_length;
-#ifdef MC68000
-  quicksort(sort_keys,count,sort_length);
-#else
-  my_string_ptr_sort((uchar*) sort_keys, (uint) count, sort_length);
-#endif
+  uchar **sort_keys= fs_info->get_sort_keys();
+
+  fs_info->sort_buffer(param, count);
+
   if (!my_b_inited(tempfile) &&
       open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
                        MYF(MY_WME)))
@@ -767,8 +849,8 @@ static inline void store_length(uchar *to, uint length, uint pack_length)
 
 /** Make a sort-key from record. */
 
-static void make_sortkey(register SORTPARAM *param,
-			 register uchar *to, uchar *ref_pos)
+static void make_sortkey(register Sort_param *param,
+                         register uchar *to, uchar *ref_pos)
 {
   reg3 Field *field;
   reg1 SORT_FIELD *sort_field;
@@ -786,9 +868,9 @@ static void make_sortkey(register SORTPARAM *param,
 	if (field->is_null())
 	{
 	  if (sort_field->reverse)
-	    bfill(to,sort_field->length+1,(char) 255);
+	    memset(to, 255, sort_field->length+1);
 	  else
-	    bzero((char*) to,sort_field->length+1);
+	    memset(to, 0, sort_field->length+1);
 	  to+= sort_field->length+1;
 	  continue;
 	}
@@ -804,7 +886,7 @@ static void make_sortkey(register SORTPARAM *param,
       switch (sort_field->result_type) {
       case STRING_RESULT:
       {
-        CHARSET_INFO *cs=item->collation.collation;
+        const CHARSET_INFO *cs=item->collation.collation;
         char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
         int diff;
         uint sort_field_length;
@@ -817,7 +899,7 @@ static void make_sortkey(register SORTPARAM *param,
         if (!res)
         {
           if (maybe_null)
-            bzero((char*) to-1,sort_field->length+1);
+            memset(to-1, 0, sort_field->length+1);
           else
           {
             /* purecov: begin deadcode */
@@ -829,7 +911,7 @@ static void make_sortkey(register SORTPARAM *param,
             DBUG_ASSERT(0);
             DBUG_PRINT("warning",
                        ("Got null on something that shouldn't be null"));
-            bzero((char*) to,sort_field->length);	// Avoid crash
+            memset(to, 0, sort_field->length);	// Avoid crash
             /* purecov: end */
           }
           break;
@@ -885,12 +967,19 @@ static void make_sortkey(register SORTPARAM *param,
           }
           if (maybe_null)
           {
+	    *to++=1;				/* purecov: inspected */
             if (item->null_value)
             {
-              bzero((char*) to++, sort_field->length+1);
+              if (maybe_null)
+                memset(to-1, 0, sort_field->length+1);
+              else
+              {
+                DBUG_PRINT("warning",
+                           ("Got null on something that shouldn't be null"));
+                memset(to, 0, sort_field->length);
+              }
               break;
             }
-	    *to++=1;				/* purecov: inspected */
           }
 	  to[7]= (uchar) value;
 	  to[6]= (uchar) (value >> 8);
@@ -912,7 +1001,8 @@ static void make_sortkey(register SORTPARAM *param,
           {
             if (item->null_value)
             { 
-              bzero((char*) to++, sort_field->length+1);
+              memset(to, 0, sort_field->length+1);
+              to++;
               break;
             }
             *to++=1;
@@ -929,7 +1019,7 @@ static void make_sortkey(register SORTPARAM *param,
           {
             if (item->null_value)
             {
-              bzero((char*) to,sort_field->length+1);
+              memset(to, 0, sort_field->length+1);
               to++;
               break;
             }
@@ -971,7 +1061,7 @@ static void make_sortkey(register SORTPARAM *param,
     SORT_ADDON_FIELD *addonf= param->addon_field;
     uchar *nulls= to;
     DBUG_ASSERT(addonf != 0);
-    bzero((char *) nulls, addonf->offset);
+    memset(nulls, 0, addonf->offset);
     to+= addonf->offset;
     for ( ; (field= addonf->field) ; addonf++)
     {
@@ -1010,7 +1100,7 @@ static void make_sortkey(register SORTPARAM *param,
   Register fields used by sorting in the sorted table's read set
 */
 
-static void register_used_fields(SORTPARAM *param)
+static void register_used_fields(Sort_param *param)
 {
   reg1 SORT_FIELD *sort_field;
   TABLE *table=param->sort_form;
@@ -1055,21 +1145,19 @@ static void register_used_fields(SORTPARAM *param)
 }
 
 
-static bool save_index(SORTPARAM *param, uchar **sort_keys, uint count, 
-                       FILESORT_INFO *table_sort)
+static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
 {
   uint offset,res_length;
   uchar *to;
   DBUG_ENTER("save_index");
 
-  my_string_ptr_sort((uchar*) sort_keys, (uint) count, param->sort_length);
+  table_sort->sort_buffer(param, count);
   res_length= param->res_length;
   offset= param->rec_length-res_length;
-  if ((ha_rows) count > param->max_rows)
-    count=(uint) param->max_rows;
   if (!(to= table_sort->record_pointers= 
         (uchar*) my_malloc(res_length*count, MYF(MY_WME))))
     DBUG_RETURN(1);                 /* purecov: inspected */
+  uchar **sort_keys= table_sort->get_sort_keys();
   for (uchar **end= sort_keys+count ; sort_keys != end ; sort_keys++)
   {
     memcpy(to, *sort_keys+offset, res_length);
@@ -1079,10 +1167,150 @@ static bool save_index(SORTPARAM *param, uchar **sort_keys, uint count,
 }
 
 
+/**
+  Test whether priority queue is worth using to get top elements of an
+  ordered result set. If it is, then allocates buffer for required amount of
+  records
+
+  @param param            Sort parameters.
+  @param filesort_info    Filesort information.
+  @param table            Table to sort.
+  @param num_rows         Estimate of number of rows in source record set.
+  @param memory_available Memory available for sorting.
+
+  DESCRIPTION
+    Given a query like this:
+      SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows;
+    This function tests whether a priority queue should be used to keep
+    the result. Necessary conditions are:
+    - estimate that it is actually cheaper than merge-sort
+    - enough memory to store the <max_rows> records.
+
+    If we don't have space for <max_rows> records, but we *do* have
+    space for <max_rows> keys, we may rewrite 'table' to sort with
+    references to records instead of additional data.
+    (again, based on estimates that it will actually be cheaper).
+
+   @retval
+    true  - if it's ok to use PQ
+    false - PQ will be slower than merge-sort, or there is not enough memory.
+*/
+
+bool check_if_pq_applicable(Sort_param *param,
+                            Filesort_info *filesort_info,
+                            TABLE *table, ha_rows num_rows,
+                            ulong memory_available)
+{
+  DBUG_ENTER("check_if_pq_applicable");
+
+  /*
+    How much Priority Queue sort is slower than qsort.
+    Measurements (see unit test) indicate that PQ is roughly 3 times slower.
+  */
+  const double PQ_slowness= 3.0;
+
+  if (param->max_rows == HA_POS_ERROR)
+  {
+    DBUG_PRINT("info", ("No LIMIT"));
+    DBUG_RETURN(NULL);
+  }
+
+  if (param->max_rows + 2 >= UINT_MAX)
+  {
+    DBUG_PRINT("info", ("Too large LIMIT"));
+    DBUG_RETURN(NULL);
+  }
+
+  ulong num_available_keys=
+    memory_available / (param->rec_length + sizeof(char*));
+  // We need 1 extra record in the buffer, when using PQ.
+  param->max_keys_per_buffer= (uint) param->max_rows + 1;
+
+  if (num_rows < num_available_keys)
+  {
+    // The whole source set fits into memory.
+    if (param->max_rows < num_rows/PQ_slowness )
+    {
+      filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
+                                       param->rec_length);
+      DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
+    }
+    else
+    {
+      // PQ will be slower.
+      DBUG_RETURN(false);
+    }
+  }
+
+  // Do we have space for LIMIT rows in memory?
+  if (param->max_keys_per_buffer < num_available_keys)
+  {
+    filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
+                                     param->rec_length);
+    DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
+  }
+
+  // Try to strip off addon fields.
+  if (param->addon_field)
+  {
+    const ulong row_length=
+      param->sort_length + param->ref_length + sizeof(char*);
+    num_available_keys= memory_available / row_length;
+
+    // Can we fit all the keys in memory?
+    if (param->max_keys_per_buffer < num_available_keys)
+    {
+      const double sort_merge_cost=
+        get_merge_many_buffs_cost_fast(num_rows,
+                                       num_available_keys,
+                                       row_length);
+      /*
+        PQ has cost:
+        (insert + qsort) * log(queue size) / TIME_FOR_COMPARE_ROWID +
+        cost of file lookup afterwards.
+        The lookup cost is a bit pessimistic: we take scan_time and assume
+        that on average we find the row after scanning half of the file.
+        A better estimate would be lookup cost, but note that we are doing
+        random lookups here, rather than sequential scan.
+      */
+      const double pq_cpu_cost= 
+        (PQ_slowness * num_rows + param->max_keys_per_buffer) *
+        log((double) param->max_keys_per_buffer) / TIME_FOR_COMPARE_ROWID;
+      const double pq_io_cost=
+        param->max_rows * table->file->scan_time() / 2.0;
+      const double pq_cost= pq_cpu_cost + pq_io_cost;
+
+      if (sort_merge_cost < pq_cost)
+        DBUG_RETURN(false);
+
+      filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
+                                       param->sort_length + param->ref_length);
+      if (filesort_info->get_sort_keys())
+      {
+        // Make attached data to be references instead of fields.
+        my_free(filesort_info->addon_buf);
+        my_free(filesort_info->addon_field);
+        filesort_info->addon_buf= NULL;
+        filesort_info->addon_field= NULL;
+        param->addon_field= NULL;
+        param->addon_length= 0;
+
+        param->res_length= param->ref_length;
+        param->sort_length+= param->ref_length;
+        param->rec_length= param->sort_length;
+
+        DBUG_RETURN(true);
+      }
+    }
+  }
+  DBUG_RETURN(false);
+}
+
+
 /** Merge buffers to make < MERGEBUFF2 buffers. */
 
-int merge_many_buff(SORTPARAM *param, uchar *sort_buffer,
-		    BUFFPEK *buffpek, uint *maxbuffer, IO_CACHE *t_file)
+int merge_many_buff(Sort_param *param, uchar *sort_buffer,
+                    BUFFPEK *buffpek, uint *maxbuffer, IO_CACHE *t_file)
 {
   register uint i;
   IO_CACHE t_file2,*from_file,*to_file,*temp;
@@ -1213,7 +1441,7 @@ void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
     other  error
 */
 
-int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
+int merge_buffers(Sort_param *param, IO_CACHE *from_file,
                   IO_CACHE *to_file, uchar *sort_buffer,
                   BUFFPEK *lastbuff, BUFFPEK *Fb, BUFFPEK *Tb,
                   int flag)
@@ -1255,7 +1483,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
            (flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length);
   uint wr_len= flag ? res_length : rec_length;
   uint wr_offset= flag ? offset : 0;
-  maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1));
+  maxcount= (ulong) (param->max_keys_per_buffer/((uint) (Tb-Fb) +1));
   to_start_filepos= my_b_tell(to_file);
   strpos= sort_buffer;
   org_max_rows=max_rows= param->max_rows;
@@ -1396,7 +1624,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
   }
   buffpek= (BUFFPEK*) queue_top(&queue);
   buffpek->base= (uchar*) sort_buffer;
-  buffpek->max_keys= param->keys;
+  buffpek->max_keys= param->max_keys_per_buffer;
 
   /*
     As we know all entries in the buffer are unique, we only have to
@@ -1486,9 +1714,9 @@ err:
 
 	/* Do a merge to output-file (save only positions) */
 
-int merge_index(SORTPARAM *param, uchar *sort_buffer,
-		       BUFFPEK *buffpek, uint maxbuffer,
-		       IO_CACHE *tempfile, IO_CACHE *outfile)
+int merge_index(Sort_param *param, uchar *sort_buffer,
+		BUFFPEK *buffpek, uint maxbuffer,
+		IO_CACHE *tempfile, IO_CACHE *outfile)
 {
   DBUG_ENTER("merge_index");
   if (merge_buffers(param,tempfile,outfile,sort_buffer,buffpek,buffpek,
@@ -1534,7 +1762,7 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
            bool *multi_byte_charset)
 {
   reg2 uint length;
-  CHARSET_INFO *cs;
+  const CHARSET_INFO *cs;
   *multi_byte_charset= 0;
 
   length=0;
@@ -1635,7 +1863,8 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
 */
 
 static SORT_ADDON_FIELD *
-get_addon_fields(THD *thd, Field **ptabfield, uint sortlength, uint *plength)
+get_addon_fields(ulong max_length_for_sort_data,
+                 Field **ptabfield, uint sortlength, uint *plength)
 {
   Field **pfield;
   Field *field;
@@ -1672,7 +1901,7 @@ get_addon_fields(THD *thd, Field **ptabfield, uint sortlength, uint *plength)
     return 0;
   length+= (null_fields+7)/8;
 
-  if (length+sortlength > thd->variables.max_length_for_sort_data ||
+  if (length+sortlength > max_length_for_sort_data ||
       !(addonf= (SORT_ADDON_FIELD *) my_malloc(sizeof(SORT_ADDON_FIELD)*
                                                (fields+1), MYF(MY_WME))))
     return 0;
@@ -1755,7 +1984,7 @@ void change_double_for_sort(double nr,uchar *to)
   if (nr == 0.0)
   {						/* Change to zero string */
     tmp[0]=(uchar) 128;
-    bzero((char*) tmp+1,sizeof(nr)-1);
+    memset(tmp+1, 0, sizeof(nr)-1);
   }
   else
   {
diff --git a/sql/filesort.h b/sql/filesort.h
index 8ee8999d055..8960fa6cb66 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -29,10 +29,8 @@ typedef struct st_sort_field SORT_FIELD;
 ha_rows filesort(THD *thd, TABLE *table, st_sort_field *sortorder,
                  uint s_length, SQL_SELECT *select,
                  ha_rows max_rows, bool sort_positions,
-                 ha_rows *examined_rows);
+                 ha_rows *examined_rows, ha_rows *found_rows);
 void filesort_free_buffers(TABLE *table, bool full);
-double get_merge_many_buffs_cost(uint *buffer, uint last_n_elems,
-                                 int elem_size);
 void change_double_for_sort(double nr,uchar *to);
 
 #endif /* FILESORT_INCLUDED */
diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc
new file mode 100644
index 00000000000..059e8e3e68e
--- /dev/null
+++ b/sql/filesort_utils.cc
@@ -0,0 +1,140 @@
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "filesort_utils.h"
+#include "sql_const.h"
+#include "sql_sort.h"
+#include "table.h"
+#include "my_sys.h"
+
+
+namespace {
+/**
+  A local helper function. See comments for get_merge_buffers_cost().
+ */
+double get_merge_cost(ha_rows num_elements, ha_rows num_buffers, uint elem_size)
+{
+  return 
+    2.0 * ((double) num_elements * elem_size) / IO_SIZE
+    + (double) num_elements * log((double) num_buffers) /
+      (TIME_FOR_COMPARE_ROWID * M_LN2);
+}
+}
+
+/**
+  This is a simplified, and faster version of @see get_merge_many_buffs_cost().
+  We calculate the cost of merging buffers, by simulating the actions
+  of @see merge_many_buff. For explanations of formulas below,
+  see comments for get_merge_buffers_cost().
+  TODO: Use this function for Unique::get_use_cost().
+*/
+double get_merge_many_buffs_cost_fast(ha_rows num_rows,
+                                      ha_rows num_keys_per_buffer,
+                                      uint    elem_size)
+{
+  ha_rows num_buffers= num_rows / num_keys_per_buffer;
+  ha_rows last_n_elems= num_rows % num_keys_per_buffer;
+  double total_cost;
+
+  // Calculate CPU cost of sorting buffers.
+  total_cost=
+    ( num_buffers * num_keys_per_buffer * log(1.0 + num_keys_per_buffer) +
+      last_n_elems * log(1.0 + last_n_elems) )
+    / TIME_FOR_COMPARE_ROWID;
+  
+  // Simulate behavior of merge_many_buff().
+  while (num_buffers >= MERGEBUFF2)
+  {
+    // Calculate # of calls to merge_buffers().
+    const ha_rows loop_limit= num_buffers - MERGEBUFF*3/2;
+    const ha_rows num_merge_calls= 1 + loop_limit/MERGEBUFF;
+    const ha_rows num_remaining_buffs=
+      num_buffers - num_merge_calls * MERGEBUFF;
+
+    // Cost of merge sort 'num_merge_calls'.
+    total_cost+=
+      num_merge_calls *
+      get_merge_cost(num_keys_per_buffer * MERGEBUFF, MERGEBUFF, elem_size);
+
+    // # of records in remaining buffers.
+    last_n_elems+= num_remaining_buffs * num_keys_per_buffer;
+
+    // Cost of merge sort of remaining buffers.
+    total_cost+=
+      get_merge_cost(last_n_elems, 1 + num_remaining_buffs, elem_size);
+
+    num_buffers= num_merge_calls;
+    num_keys_per_buffer*= MERGEBUFF;
+  }
+
+  // Simulate final merge_buff call.
+  last_n_elems+= num_keys_per_buffer * num_buffers;
+  total_cost+= get_merge_cost(last_n_elems, 1 + num_buffers, elem_size);
+  return total_cost;
+}
+
+uchar **Filesort_buffer::alloc_sort_buffer(uint num_records, uint record_length)
+{
+  DBUG_ENTER("alloc_sort_buffer");
+
+  DBUG_EXECUTE_IF("alloc_sort_buffer_fail",
+                  DBUG_SET("+d,simulate_out_of_memory"););
+
+  if (m_idx_array.is_null())
+  {
+    uchar **sort_keys=
+      (uchar**) my_malloc(num_records * (record_length + sizeof(uchar*)),
+                          MYF(0));
+    m_idx_array= Idx_array(sort_keys, num_records);
+    m_record_length= record_length;
+    uchar **start_of_data= m_idx_array.array() + m_idx_array.size();
+    m_start_of_data= reinterpret_cast<uchar*>(start_of_data);
+  }
+  else
+  {
+    DBUG_ASSERT(num_records == m_idx_array.size());
+    DBUG_ASSERT(record_length == m_record_length);
+  }
+  DBUG_RETURN(m_idx_array.array());
+}
+
+
+void Filesort_buffer::free_sort_buffer()
+{
+  my_free(m_idx_array.array());
+  m_idx_array= Idx_array();
+  m_record_length= 0;
+  m_start_of_data= NULL;
+}
+
+
+void Filesort_buffer::sort_buffer(const Sort_param *param, uint count)
+{
+  if (count <= 1)
+    return;
+  uchar **keys= get_sort_keys();
+  uchar **buffer= NULL;
+  if (radixsort_is_appliccable(count, param->sort_length) &&
+      (buffer= (uchar**) my_malloc(count*sizeof(char*), MYF(0))))
+  {
+    radixsort_for_str_ptr(keys, count, param->sort_length, buffer);
+    my_free(buffer);
+    return;
+  }
+  
+  size_t size= param->sort_length;
+  my_qsort2(keys, count, sizeof(uchar*), get_ptr_compare(size), &size);
+}
+
diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h
new file mode 100644
index 00000000000..4cccf8ffa02
--- /dev/null
+++ b/sql/filesort_utils.h
@@ -0,0 +1,129 @@
+/* Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+#ifndef FILESORT_UTILS_INCLUDED
+#define FILESORT_UTILS_INCLUDED
+
+#include "my_global.h"
+#include "my_base.h"
+#include "sql_array.h"
+
+class Sort_param;
+/*
+  Calculate cost of merge sort
+
+    @param num_rows            Total number of rows.
+    @param num_keys_per_buffer Number of keys per buffer.
+    @param elem_size           Size of each element.
+
+    Calculates cost of merge sort by simulating call to merge_many_buff().
+
+  @retval
+    Computed cost of merge sort in disk seeks.
+
+  @note
+    Declared here in order to be able to unit test it,
+    since library dependencies have not been sorted out yet.
+
+    See also comments get_merge_many_buffs_cost().
+*/
+
+double get_merge_many_buffs_cost_fast(ha_rows num_rows,
+                                      ha_rows num_keys_per_buffer,
+                                      uint    elem_size);
+
+
+/**
+  A wrapper class around the buffer used by filesort().
+  The buffer is a contiguous chunk of memory,
+  where the first part is <num_records> pointers to the actual data.
+
+  We wrap the buffer in order to be able to do lazy initialization of the
+  pointers: the buffer is often much larger than what we actually need.
+
+  The buffer must be kept available for multiple executions of the
+  same sort operation, so we have explicit allocate and free functions,
+  rather than doing alloc/free in CTOR/DTOR.
+*/
+class Filesort_buffer
+{
+public:
+  Filesort_buffer() :
+    m_idx_array(), m_record_length(0), m_start_of_data(NULL)
+  {}
+
+  /** Sort me... */
+  void sort_buffer(const Sort_param *param, uint count);
+
+  /// Initializes a record pointer.
+  uchar *get_record_buffer(uint idx)
+  {
+    m_idx_array[idx]= m_start_of_data + (idx * m_record_length);
+    return m_idx_array[idx];
+  }
+
+  /// Initializes all the record pointers.
+  void init_record_pointers()
+  {
+    for (uint ix= 0; ix < m_idx_array.size(); ++ix)
+      (void) get_record_buffer(ix);
+  }
+
+  /// Returns total size: pointer array + record buffers.
+  size_t sort_buffer_size() const
+  {
+    return m_idx_array.size() * (m_record_length + sizeof(uchar*));
+  }
+
+  /// Allocates the buffer, but does *not* initialize pointers.
+  uchar **alloc_sort_buffer(uint num_records, uint record_length);
+
+
+  /// Check  <num_records, record_length> for the buffer
+  bool check_sort_buffer_properties(uint num_records,  uint record_length)
+  {
+    return (static_cast<uint>(m_idx_array.size()) == num_records &&
+            m_record_length == m_record_length);
+  }
+
+  /// Frees the buffer.
+  void free_sort_buffer();
+
+  /// Getter, for calling routines which still use the uchar** interface.
+  uchar **get_sort_keys() { return m_idx_array.array(); }
+
+  /**
+    We need an assignment operator, see filesort().
+    This happens to have the same semantics as the one that would be
+    generated by the compiler. We still implement it here, to show shallow
+    assignment explicitly: we have two objects sharing the same array.
+  */
+  Filesort_buffer &operator=(const Filesort_buffer &rhs)
+  {
+    m_idx_array= rhs.m_idx_array;
+    m_record_length= rhs.m_record_length;
+    m_start_of_data= rhs.m_start_of_data;
+    return *this;
+  }
+
+private:
+  typedef Bounds_checked_array<uchar*> Idx_array;
+
+  Idx_array  m_idx_array;
+  uint       m_record_length;
+  uchar     *m_start_of_data;
+};
+
+#endif  // FILESORT_UTILS_INCLUDED
diff --git a/sql/sql_array.h b/sql/sql_array.h
index 67f1f1c2ad7..98a4a4815af 100644
--- a/sql/sql_array.h
+++ b/sql/sql_array.h
@@ -19,6 +19,77 @@
 
 #include <my_sys.h>
 
+/**
+   A wrapper class which provides array bounds checking.
+   We do *not* own the array, we simply have a pointer to the first element,
+   and a length.
+
+   @remark
+   We want the compiler-generated versions of:
+   - the copy CTOR (memberwise initialization)
+   - the assignment operator (memberwise assignment)
+
+   @param Element_type The type of the elements of the container.
+ */
+template <typename Element_type> class Bounds_checked_array
+{
+public:
+  Bounds_checked_array() : m_array(NULL), m_size(0) {}
+
+  Bounds_checked_array(Element_type *el, size_t size)
+    : m_array(el), m_size(size)
+  {}
+
+  void reset() { m_array= NULL; m_size= 0; }
+ 
+  void reset(Element_type *array, size_t size)
+  {
+    m_array= array;
+    m_size= size;
+  }
+
+  /**
+    Set a new bound on the array. Does not resize the underlying
+    array, so the new size must be smaller than or equal to the
+    current size.
+   */
+  void resize(size_t new_size)
+  {
+    DBUG_ASSERT(new_size <= m_size);
+    m_size= new_size;
+  }
+
+  Element_type &operator[](size_t n)
+  {
+    DBUG_ASSERT(n < m_size);
+    return m_array[n];
+  }
+
+  const Element_type &operator[](size_t n) const
+  {
+    DBUG_ASSERT(n < m_size);
+    return m_array[n];
+  }
+
+  size_t element_size() const { return sizeof(Element_type); }
+  size_t size() const         { return m_size; }
+
+  bool is_null() const { return m_array == NULL; }
+
+  void pop_front()
+  {
+    DBUG_ASSERT(m_size > 0);
+    m_array+= 1;
+    m_size-= 1;
+  }
+
+  Element_type *array() const { return m_array; }
+
+private:
+  Element_type *m_array;
+  size_t        m_size;
+};
+
 /*
   A typesafe wrapper around DYNAMIC_ARRAY
 */
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 5128b1284dd..e590759ec2f 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -244,6 +244,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
     uint         length= 0;
     SORT_FIELD  *sortorder;
     ha_rows examined_rows;
+    ha_rows found_rows;
     
     table->update_const_key_parts(conds);
     order= simple_remove_const(order, conds);
@@ -264,9 +265,10 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
                                                    MYF(MY_FAE | MY_ZEROFILL));
     
       if (!(sortorder= make_unireg_sortorder(order, &length, NULL)) ||
-	  (table->sort.found_records = filesort(thd, table, sortorder, length,
-                                                select, HA_POS_ERROR, 1,
-                                                &examined_rows))
+	  (table->sort.found_records= filesort(thd, table, sortorder, length,
+                                               select, HA_POS_ERROR,
+                                               true,
+                                               &examined_rows, &found_rows))
 	  == HA_POS_ERROR)
       {
         delete select;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 0c1fb07d761..f79aa503cbc 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1435,9 +1435,10 @@ JOIN::optimize()
         We have found that grouping can be removed since groups correspond to
         only one row anyway, but we still have to guarantee correct result
         order. The line below effectively rewrites the query from GROUP BY
-        <fields> to ORDER BY <fields>. There are two exceptions:
+        <fields> to ORDER BY <fields>. There are three exceptions:
         - if skip_sort_order is set (see above), then we can simply skip
           GROUP BY;
+        - if we are in a subquery, we don't have to maintain order
         - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
           with the GROUP BY ones, i.e. either one is a prefix of another.
           We only check if the ORDER BY is a prefix of GROUP BY. In this case
@@ -1447,7 +1448,13 @@ JOIN::optimize()
           'order' as is.
        */
       if (!order || test_if_subpart(group_list, order))
-          order= skip_sort_order ? 0 : group_list;
+      {
+        if (skip_sort_order ||
+            select_lex->master_unit()->item) // This is a subquery
+          order= NULL;
+        else
+          order= group_list;
+      }
       /*
         If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be 
         rewritten to IGNORE INDEX FOR ORDER BY(fields).
@@ -1853,6 +1860,7 @@ int JOIN::init_execution()
 
       if (!group_list && ! exec_tmp_table1->distinct && order && simple_order)
       {
+        DBUG_PRINT("info",("Sorting for order"));
         thd_proc_info(thd, "Sorting for order");
         if (create_sort_index(thd, this, order,
                               HA_POS_ERROR, HA_POS_ERROR, TRUE))
@@ -2177,6 +2185,8 @@ JOIN::exec()
   int      tmp_error;
   DBUG_ENTER("JOIN::exec");
 
+  const bool has_group_by= this->group;
+
   thd_proc_info(thd, "executing");
   error= 0;
   if (procedure)
@@ -2515,11 +2525,12 @@ JOIN::exec()
       }
       if (curr_join->group_list)
       {
-	thd_proc_info(thd, "Creating sort index");
 	if (curr_join->join_tab == join_tab && save_join_tab())
 	{
 	  DBUG_VOID_RETURN;
 	}
+	DBUG_PRINT("info",("Sorting for index"));
+	thd_proc_info(thd, "Creating sort index");
 	if (create_sort_index(thd, curr_join, curr_join->group_list,
 			      HA_POS_ERROR, HA_POS_ERROR, FALSE) ||
 	    make_group_fields(this, curr_join))
@@ -2785,13 +2796,39 @@ JOIN::exec()
 	the query. XXX: it's never shown in EXPLAIN!
 	OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
       */
-      if (create_sort_index(thd, curr_join,
-			    curr_join->group_list ? 
-			    curr_join->group_list : curr_join->order,
-			    curr_join->select_limit,
-			    (select_options & OPTION_FOUND_ROWS ?
-			     HA_POS_ERROR : unit->select_limit_cnt),
-                            curr_join->group_list ? TRUE : FALSE))
+      DBUG_PRINT("info",("Sorting for order by/group by"));
+      ORDER *order_arg=
+        curr_join->group_list ? curr_join->group_list : curr_join->order;
+      /*
+        filesort_limit:	 Return only this many rows from filesort().
+        We can use select_limit_cnt only if we have no group_by and 1 table.
+        This allows us to use Bounded_queue for queries like:
+          "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
+        select_limit == HA_POS_ERROR (we need a full table scan)
+        unit->select_limit_cnt == 1 (we only need one row in the result set)
+       */
+      const ha_rows filesort_limit_arg=
+        (has_group_by || curr_join->table_count > 1)
+        ? curr_join->select_limit : unit->select_limit_cnt;
+      const ha_rows select_limit_arg=
+        select_options & OPTION_FOUND_ROWS
+        ? HA_POS_ERROR : unit->select_limit_cnt;
+
+      DBUG_PRINT("info", ("has_group_by %d "
+                          "curr_join->table_count %d "
+                          "curr_join->m_select_limit %d "
+                          "unit->select_limit_cnt %d",
+                          has_group_by,
+                          curr_join->table_count,
+                          (int) curr_join->select_limit,
+                          (int) unit->select_limit_cnt));
+
+      if (create_sort_index(thd,
+                            curr_join,
+                            order_arg,
+                            filesort_limit_arg,
+                            select_limit_arg,
+                            curr_join->group_list ? FALSE : TRUE))
 	DBUG_VOID_RETURN;
       sortorder= curr_join->sortorder;
       if (curr_join->const_tables != curr_join->table_count &&
@@ -2823,6 +2860,14 @@ JOIN::exec()
                                    Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
   error= do_select(curr_join, curr_fields_list, NULL, procedure);
   thd->limit_found_rows= curr_join->send_records;
+  if (curr_join->order &&
+      curr_join->sortorder)
+  {
+    /* Use info provided by filesort. */
+    DBUG_ASSERT(curr_join->table_count > curr_join->const_tables);
+    JOIN_TAB *tab= curr_join->join_tab + curr_join->const_tables;
+    thd->limit_found_rows= tab->records;
+  }
 
   /* Accumulate the counts from all join iterations of all join parts. */
   thd->examined_row_count+= curr_join->examined_rows;
@@ -17163,7 +17208,25 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       if ((error= join->result->send_data(*join->fields)))
         DBUG_RETURN(error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR);
     }
-    if (++join->send_records >= join->unit->select_limit_cnt &&
+
+    ++join->send_records;
+    if (join->send_records >= join->unit->select_limit_cnt &&
+        !join->do_send_rows)
+    {
+      /*
+        If filesort is used for sorting, stop after select_limit_cnt+1
+        records are read. Because of optimization in some cases it can
+        provide only select_limit_cnt+1 records.
+      */
+      if (join->order &&
+          join->sortorder &&
+          join->select_options & OPTION_FOUND_ROWS)
+      {
+        DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT"));
+        DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
+      }
+    }
+    if (join->send_records >= join->unit->select_limit_cnt &&
 	join->do_send_rows)
     {
       if (join->select_options & OPTION_FOUND_ROWS)
@@ -18848,6 +18911,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 {
   uint length= 0;
   ha_rows examined_rows;
+  ha_rows found_rows;
+  ha_rows filesort_retval= HA_POS_ERROR;
   TABLE *table;
   SQL_SELECT *select;
   JOIN_TAB *tab;
@@ -18925,10 +18990,11 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     goto err;
   if (table->s->tmp_table)
     table->file->info(HA_STATUS_VARIABLE);	// Get record count
-  table->sort.found_records=filesort(thd, table,join->sortorder, length,
-                                     select, filesort_limit, 0,
-                                     &examined_rows);
-  tab->records= table->sort.found_records;	// For SQL_CALC_ROWS
+  filesort_retval= filesort(thd, table, join->sortorder, length,
+                            select, filesort_limit, 0,
+                            &examined_rows, &found_rows);
+  table->sort.found_records= filesort_retval;
+  tab->records= found_rows;                     // For SQL_CALC_ROWS
   if (select)
   {
     /*
@@ -18957,7 +19023,7 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
   tab->read_first_record= join_init_read_record;
   tab->join->examined_rows+=examined_rows;
   table->disable_keyread(); // Restore if we used indexes
-  DBUG_RETURN(table->sort.found_records == HA_POS_ERROR);
+  DBUG_RETURN(filesort_retval == HA_POS_ERROR);
 err:
   DBUG_RETURN(-1);
 }
@@ -19288,7 +19354,7 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
   pos= sort= sortorder;
 
   if (!pos)
-    return 0;
+    DBUG_RETURN(0);
 
   for (;order;order=order->next,pos++)
   {
@@ -19305,6 +19371,7 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
     else
       pos->item= *order->item;
     pos->reverse=! order->asc;
+    DBUG_ASSERT(pos->field != NULL || pos->item != NULL);
   }
   *length=count;
   DBUG_RETURN(sort);
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index f1a3a2f9d8b..d30ddfb6eec 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -16,6 +16,7 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 
+#include "m_string.h"                           /* memset */
 #include "my_global.h"                          /* uchar */
 #include "my_base.h"                            /* ha_rows */
 #include "my_sys.h"                             /* qsort2_cmp */
@@ -27,7 +28,6 @@ typedef struct st_sort_field SORT_FIELD;
 class Field;
 struct TABLE;
 
-
 /* Defines used by filesort and uniques */
 
 #define MERGEBUFF		7
@@ -65,41 +65,51 @@ struct BUFFPEK_COMPARE_CONTEXT
   void *key_compare_arg;
 };
 
-typedef struct st_sort_param {
-  uint rec_length;          /* Length of sorted records */
-  uint sort_length;			/* Length of sorted columns */
-  uint ref_length;			/* Length of record ref. */
-  uint addon_length;        /* Length of added packed fields */
-  uint res_length;          /* Length of records in final sorted file/buffer */
-  uint keys;				/* Max keys / buffer */
+
+class Sort_param {
+public:
+  uint rec_length;            // Length of sorted records.
+  uint sort_length;           // Length of sorted columns.
+  uint ref_length;            // Length of record ref.
+  uint addon_length;          // Length of added packed fields.
+  uint res_length;            // Length of records in final sorted file/buffer.
+  uint max_keys_per_buffer;   // Max keys / buffer.
   uint min_dupl_count;
-  ha_rows max_rows,examined_rows;
-  TABLE *sort_form;			/* For quicker make_sortkey */
+  ha_rows max_rows;           // Select limit, or HA_POS_ERROR if unlimited.
+  ha_rows examined_rows;      // Number of examined rows.
+  TABLE *sort_form;           // For quicker make_sortkey.
   SORT_FIELD *local_sortorder;
   SORT_FIELD *end;
-  SORT_ADDON_FIELD *addon_field; /* Descriptors for companion fields */
+  SORT_ADDON_FIELD *addon_field; // Descriptors for companion fields.
   uchar *unique_buff;
   bool not_killable;
   char* tmp_buffer;
-  /* The fields below are used only by Unique class */
+  // The fields below are used only by Unique class.
   qsort2_cmp compare;
   BUFFPEK_COMPARE_CONTEXT cmp_context;
-} SORTPARAM;
 
+  Sort_param()
+  {
+    memset(this, 0, sizeof(*this));
+  }
+  void init_for_filesort(uint sortlen, TABLE *table,
+                         ulong max_length_for_sort_data,
+                         ha_rows maxrows, bool sort_positions);
+};
 
-int merge_many_buff(SORTPARAM *param, uchar *sort_buffer,
+
+int merge_many_buff(Sort_param *param, uchar *sort_buffer,
 		    BUFFPEK *buffpek,
 		    uint *maxbuffer, IO_CACHE *t_file);
 uint read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek,
 		    uint sort_length);
-int merge_buffers(SORTPARAM *param,IO_CACHE *from_file,
-		  IO_CACHE *to_file, uchar *sort_buffer,
-		  BUFFPEK *lastbuff,BUFFPEK *Fb,
-		  BUFFPEK *Tb,int flag);
-int merge_index(SORTPARAM *param, uchar *sort_buffer,
+int merge_buffers(Sort_param *param,IO_CACHE *from_file,
+                  IO_CACHE *to_file, uchar *sort_buffer,
+                  BUFFPEK *lastbuff,BUFFPEK *Fb,
+                  BUFFPEK *Tb,int flag);
+int merge_index(Sort_param *param, uchar *sort_buffer,
 		BUFFPEK *buffpek, uint maxbuffer,
 		IO_CACHE *tempfile, IO_CACHE *outfile);
-
 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length);
 
 #endif /* SQL_SORT_INCLUDED */
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 031932b4c06..13f3d7b1ae8 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -7201,6 +7201,7 @@ copy_data_between_tables(THD *thd, TABLE *from,TABLE *to,
   List<Item>   fields;
   List<Item>   all_fields;
   ha_rows examined_rows;
+  ha_rows found_rows;
   bool auto_increment_field_copied= 0;
   ulonglong save_sql_mode= thd->variables.sql_mode;
   ulonglong prev_insert_id, time_to_report_progress;
@@ -7284,8 +7285,9 @@ copy_data_between_tables(THD *thd, TABLE *from,TABLE *to,
                       &tables, fields, all_fields, order) ||
           !(sortorder= make_unireg_sortorder(order, &length, NULL)) ||
           (from->sort.found_records= filesort(thd, from, sortorder, length,
-                                              (SQL_SELECT *) 0, HA_POS_ERROR,
-                                              1, &examined_rows)) ==
+                                              NULL, HA_POS_ERROR,
+                                              true,
+                                              &examined_rows, &found_rows)) ==
           HA_POS_ERROR)
         goto err;
     }
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index f2b6c5c9f92..bf261bffec3 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -498,13 +498,15 @@ int mysql_update(THD *thd,
       uint         length= 0;
       SORT_FIELD  *sortorder;
       ha_rows examined_rows;
+      ha_rows found_rows;
 
       table->sort.io_cache = (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
 						    MYF(MY_FAE | MY_ZEROFILL));
       if (!(sortorder=make_unireg_sortorder(order, &length, NULL)) ||
           (table->sort.found_records= filesort(thd, table, sortorder, length,
-                                               select, limit, 1,
-                                               &examined_rows))
+                                               select, limit,
+                                               true,
+                                               &examined_rows, &found_rows))
           == HA_POS_ERROR)
       {
 	goto err;
diff --git a/sql/table.h b/sql/table.h
index 87affe984fc..fc451ff5050 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -29,6 +29,7 @@
 #include "handler.h"                /* row_type, ha_choice, handler */
 #include "mysql_com.h"              /* enum_field_types */
 #include "thr_lock.h"                  /* thr_lock_type */
+#include "filesort_utils.h"
 
 /* Structs that defines the TABLE */
 
@@ -299,11 +300,14 @@ enum tmp_table_type
 };
 enum release_type { RELEASE_NORMAL, RELEASE_WAIT_FOR_DROP };
 
-typedef struct st_filesort_info
+
+class Filesort_info
 {
+  /// Buffer for sorting keys.
+  Filesort_buffer filesort_buffer;
+
+public:
   IO_CACHE *io_cache;           /* If sorted through filesort */
-  uchar     **sort_keys;        /* Buffer for sorting keys */
-  uint      keys;               /* Number of key pointers in buffer */
   uchar     *buffpek;           /* Buffer for buffpek structures */
   uint      buffpek_len;        /* Max number of buffpeks in the buffer */
   uchar     *addon_buf;         /* Pointer to a buffer if sorted with fields */
@@ -312,7 +316,38 @@ typedef struct st_filesort_info
   void    (*unpack)(struct st_sort_addon_field *, uchar *, uchar *); /* To unpack back */
   uchar     *record_pointers;    /* If sorted in memory */
   ha_rows   found_records;      /* How many records in sort */
-} FILESORT_INFO;
+
+  /** Sort filesort_buffer */
+  void sort_buffer(Sort_param *param, uint count)
+  { filesort_buffer.sort_buffer(param, count); }
+
+  /**
+     Accessors for Filesort_buffer (which @c).
+  */
+  uchar *get_record_buffer(uint idx)
+  { return filesort_buffer.get_record_buffer(idx); }
+
+  uchar **get_sort_keys()
+  { return filesort_buffer.get_sort_keys(); }
+
+  uchar **alloc_sort_buffer(uint num_records, uint record_length)
+  { return filesort_buffer.alloc_sort_buffer(num_records, record_length); }
+
+  bool check_sort_buffer_properties(uint num_records, uint record_length)
+  {
+    return filesort_buffer.check_sort_buffer_properties(num_records,
+                                                        record_length);
+  }
+
+  void free_sort_buffer()
+  { filesort_buffer.free_sort_buffer(); }
+
+  void init_record_pointers()
+  { filesort_buffer.init_record_pointers(); }
+
+  size_t sort_buffer_size() const
+  { return filesort_buffer.sort_buffer_size(); }
+};
 
 
 /*
@@ -1136,7 +1171,7 @@ public:
   REGINFO reginfo;			/* field connections */
   MEM_ROOT mem_root;
   GRANT_INFO grant;
-  FILESORT_INFO sort;
+  Filesort_info sort;
   /*
     The arena which the items for expressions from the table definition
     are associated with.  
diff --git a/sql/uniques.cc b/sql/uniques.cc
index ae50a1d3970..c246cd637bd 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -620,7 +620,6 @@ bool Unique::walk(tree_walk_action action, void *walk_action_arg)
 
 bool Unique::get(TABLE *table)
 {
-  SORTPARAM sort_param;
   table->sort.found_records=elements+tree.elements_in_tree;
   if (my_b_tell(&file) == 0)
   {
@@ -660,6 +659,7 @@ bool Unique::get(TABLE *table)
     return 1;
   reinit_io_cache(outfile,WRITE_CACHE,0L,0,0);
 
+  Sort_param sort_param; 
   bzero((char*) &sort_param,sizeof(sort_param));
   sort_param.max_rows= elements;
   sort_param.sort_form=table;
@@ -667,14 +667,15 @@ bool Unique::get(TABLE *table)
    full_size;
   sort_param.min_dupl_count= min_dupl_count;
   sort_param.res_length= 0;
-  sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length);
+  sort_param.max_keys_per_buffer= 
+    (uint) (max_in_memory_size / sort_param.sort_length);
   sort_param.not_killable=1;
 
-  if (!(sort_buffer=(uchar*) my_malloc((sort_param.keys+1) *
+  if (!(sort_buffer=(uchar*) my_malloc((sort_param.max_keys_per_buffer+1) *
 				       sort_param.sort_length,
 				       MYF(0))))
     return 1;
-  sort_param.unique_buff= sort_buffer+(sort_param.keys*
+  sort_param.unique_buff= sort_buffer+(sort_param.max_keys_per_buffer *
 				       sort_param.sort_length);
 
   sort_param.compare= (qsort2_cmp) buffpek_compare;
author	Igor Babaev <igor@askmonty.org>	2012-09-01 14:21:59 -0700
committer	Igor Babaev <igor@askmonty.org>	2012-09-01 14:21:59 -0700
commit	a6b88f1431238152643e41979ce10b9bbdac2a82 (patch)
tree	42482ef66f3c59b255d299fc58ace8c1b0c518ea /sql
parent	5a86a61219826aadf8d08cbc447fe438f2bf50c3 (diff)
download	mariadb-git-a6b88f1431238152643e41979ce10b9bbdac2a82.tar.gz