51 files changed, 6288 insertions, 2515 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 6136c39fc9c..062e59bee5b 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -123,7 +123,7 @@ SET (SQL_SOURCE
                sql_profile.cc event_parse_data.cc sql_alter.cc
                sql_signal.cc rpl_handler.cc mdl.cc sql_admin.cc
                transaction.cc sys_vars.cc sql_truncate.cc datadict.cc
-               sql_reload.cc sql_cmd.h item_inetfunc.cc
+               sql_reload.cc sql_cmd.h item_inetfunc.cc 
 
                # added in MariaDB:
                sql_explain.h sql_explain.cc
@@ -138,6 +138,7 @@ SET (SQL_SOURCE
                my_json_writer.cc my_json_writer.h
                rpl_gtid.cc rpl_parallel.cc
                sql_type.cc sql_type.h
+               item_windowfunc.cc sql_window.cc
 	       sql_cte.cc sql_cte.h
 	       ${WSREP_SOURCES}
                table_cache.cc encryption.cc
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 54a79421d2e..78b74380f7e 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -74,7 +74,6 @@ static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info,
                                    TABLE *table,
                                    ha_rows records, ulong memory_available);
 
-
 void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
                                    ulong max_length_for_sort_data,
                                    ha_rows maxrows, bool sort_positions)
@@ -124,14 +123,15 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
 
   @param      thd            Current thread
   @param      table          Table to sort
-  @param      sortorder      How to sort the table
-  @param      s_length       Number of elements in sortorder
-  @param      select         Condition to apply to the rows
-  @param      max_rows       Return only this many rows
+  @param      filesort       How to sort the table
   @param      sort_positions Set to TRUE if we want to force sorting by
 			     position
                              (Needed by UPDATE/INSERT or ALTER TABLE or
                               when rowids are required by executor)
+                             applying WHERE condition.
+  @param[out] found_rows     Store the number of found rows here.
+                             This is the number of found rows after
+                             applying WHERE condition.
   @note
     If we sort by position (like if sort_positions is 1) filesort() will
     call table->prepare_for_position().
@@ -141,11 +141,9 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
     #			SORT_INFO
 */
 
-SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
-                        uint s_length,
-                        SQL_SELECT *select, ha_rows max_rows,
-                        bool sort_positions,
-                        Filesort_tracker* tracker)
+SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
+                    bool sort_positions,
+                    Filesort_tracker* tracker)
 {
   int error;
   size_t memory_available= thd->variables.sortbuff_size;
@@ -156,9 +154,16 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
   Sort_param param;
   bool multi_byte_charset;
   Bounded_queue<uchar, uchar> pq;
+  SQL_SELECT *const select= filesort->select;
+  ha_rows max_rows= filesort->limit;
+  uint s_length= 0;
 
   DBUG_ENTER("filesort");
-  DBUG_EXECUTE("info",TEST_filesort(sortorder,s_length););
+
+  if (!(s_length= filesort->make_sortorder(thd)))
+    DBUG_RETURN(NULL);  /* purecov: inspected */
+
+  DBUG_EXECUTE("info",TEST_filesort(filesort->sortorder,s_length););
 #ifdef SKIP_DBUG_IN_FILESORT
   DBUG_PUSH("");		/* No DBUG here */
 #endif
@@ -194,7 +199,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
   error= 1;
   sort->found_rows= HA_POS_ERROR;
 
-  param.init_for_filesort(sortlength(thd, sortorder, s_length,
+  param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length,
                                      &multi_byte_charset),
                           table,
                           thd->variables.max_length_for_sort_data,
@@ -275,7 +280,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
     goto err;
 
   param.sort_form= table;
-  param.end=(param.local_sortorder=sortorder)+s_length;
+  param.end=(param.local_sortorder=filesort->sortorder)+s_length;
   num_rows= find_all_keys(thd, &param, select,
                           sort,
                           &buffpek_pointers,
@@ -429,6 +434,55 @@ SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
 } /* filesort */
 
 
+void Filesort::cleanup()
+{
+  if (select && own_select)
+  {
+    select->cleanup();
+    select= NULL;
+  }
+}
+
+
+uint Filesort::make_sortorder(THD *thd)
+{
+  uint count;
+  SORT_FIELD *sort,*pos;
+  ORDER *ord;
+  DBUG_ENTER("make_sortorder");
+
+
+  count=0;
+  for (ord = order; ord; ord= ord->next)
+    count++;
+  if (!sortorder)
+    sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) * (count + 1));
+  pos= sort= sortorder;
+
+  if (!pos)
+    DBUG_RETURN(0);
+
+  for (ord= order; ord; ord= ord->next, pos++)
+  {
+    Item *item= ord->item[0]->real_item();
+    pos->field= 0; pos->item= 0;
+    if (item->type() == Item::FIELD_ITEM)
+      pos->field= ((Item_field*) item)->field;
+    else if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item())
+      pos->field= ((Item_sum*) item)->get_tmp_table_field();
+    else if (item->type() == Item::COPY_STR_ITEM)
+    {						// Blob patch
+      pos->item= ((Item_copy*) item)->get_item();
+    }
+    else
+      pos->item= *ord->item;
+    pos->reverse= (ord->direction == ORDER::ORDER_DESC);
+    DBUG_ASSERT(pos->field != NULL || pos->item != NULL);
+  }
+  DBUG_RETURN(count);
+  }
+
+
 /** Read 'count' number of buffer pointers into memory. */
 
 static uchar *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count,
diff --git a/sql/filesort.h b/sql/filesort.h
index 454c745b5c0..6d665dbe0aa 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -17,13 +17,58 @@
 #define FILESORT_INCLUDED
 
 #include "my_base.h"                            /* ha_rows */
+#include "sql_list.h"                           /* Sql_alloc */
 #include "filesort_utils.h"
 
 class SQL_SELECT;
 class THD;
 struct TABLE;
-struct SORT_FIELD;
 class Filesort_tracker;
+struct SORT_FIELD;
+typedef struct st_order ORDER;
+ 
+
+/**
+  Sorting related info.
+  To be extended by another WL to include complete filesort implementation.
+*/
+class Filesort: public Sql_alloc
+{
+public:
+  /** List of expressions to order the table by */
+  ORDER *order;
+  /** Number of records to return */
+  ha_rows limit;
+  /** ORDER BY list with some precalculated info for filesort */
+  SORT_FIELD *sortorder;
+  /** select to use for getting records */
+  SQL_SELECT *select;
+  /** TRUE <=> free select on destruction */
+  bool own_select;
+  /** true means we are using Priority Queue for order by with limit. */
+  bool using_pq;
+
+  Filesort_tracker *tracker;
+
+  Filesort(ORDER *order_arg, ha_rows limit_arg, SQL_SELECT *select_arg):
+    order(order_arg),
+    limit(limit_arg),
+    sortorder(NULL),
+    select(select_arg),
+    own_select(false), 
+    using_pq(false)
+  {
+    DBUG_ASSERT(order);
+  };
+
+  ~Filesort() { cleanup(); }
+  /* Prepare ORDER BY list for sorting. */
+  uint make_sortorder(THD *thd);
+
+private:
+  void cleanup();
+};
+
 
 class SORT_INFO
 {
@@ -97,19 +142,14 @@ public:
   size_t sort_buffer_size() const
   { return filesort_buffer.sort_buffer_size(); }
 
-  friend SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
-                                 uint s_length,
-                                 SQL_SELECT *select, ha_rows max_rows,
-                                 bool sort_positions,
-                                 Filesort_tracker* tracker);
+  friend SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
+                             bool sort_positions,
+                             Filesort_tracker* tracker);
 };
 
-
-SORT_INFO *filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder,
-                        uint s_length,
-                        SQL_SELECT *select, ha_rows max_rows,
-                        bool sort_positions,
-                        Filesort_tracker* tracker);
+SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
+                    bool sort_positions,
+                    Filesort_tracker* tracker);
 
 void change_double_for_sort(double nr,uchar *to);
 
diff --git a/sql/item.cc b/sql/item.cc
index 7cdb2d2e7e4..4d3a3a6e3e7 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -455,7 +455,7 @@ Item::Item(THD *thd):
 {
   DBUG_ASSERT(thd);
   marker= 0;
-  maybe_null=null_value=with_sum_func=with_field=0;
+  maybe_null=null_value=with_sum_func=with_window_func=with_field=0;
   in_rollup= 0;
   with_subselect= 0;
    /* Initially this item is not attached to any JOIN_TAB. */
@@ -500,6 +500,7 @@ Item::Item(THD *thd, Item *item):
   in_rollup(item->in_rollup),
   null_value(item->null_value),
   with_sum_func(item->with_sum_func),
+  with_window_func(item->with_window_func),
   with_field(item->with_field),
   fixed(item->fixed),
   is_autogenerated_name(item->is_autogenerated_name),
@@ -1749,7 +1750,7 @@ public:
     thd->fatal_error() may be called if we are out of memory
 */
 
-void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
+void Item::split_sum_func2(THD *thd, Ref_ptr_array ref_pointer_array,
                            List<Item> &fields, Item **ref, 
                            uint split_flags)
 {
@@ -1813,7 +1814,7 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
   if (!(item_ref= (new (thd->mem_root)
                    Item_aggregate_ref(thd,
                                       &thd->lex->current_select->context,
-                                      ref_pointer_array + el, 0, name))))
+				      &ref_pointer_array[el], 0, name))))
     return;                                   // fatal_error is set
   if (type() == SUM_FUNC_ITEM)
     item_ref->depended_from= ((Item_sum *) this)->depended_from(); 
@@ -3755,16 +3756,18 @@ Item_param::set_value(THD *thd, sp_rcontext *ctx, Item **it)
                       str_value.charset());
     collation.set(str_value.charset(), DERIVATION_COERCIBLE);
     decimals= 0;
-
+    item_type= Item::STRING_ITEM;
     break;
   }
 
   case REAL_RESULT:
     set_double(arg->val_real());
+    item_type= Item::REAL_ITEM;
     break;
 
   case INT_RESULT:
     set_int(arg->val_int(), arg->max_length);
+    item_type= Item::INT_ITEM;
     break;
 
   case DECIMAL_RESULT:
@@ -3776,6 +3779,7 @@ Item_param::set_value(THD *thd, sp_rcontext *ctx, Item **it)
       return TRUE;
 
     set_decimal(dv);
+    item_type= Item::DECIMAL_ITEM;
     break;
   }
 
@@ -3785,11 +3789,11 @@ Item_param::set_value(THD *thd, sp_rcontext *ctx, Item **it)
     DBUG_ASSERT(TRUE);  // Abort in debug mode.
 
     set_null();         // Set to NULL in release mode.
+    item_type= Item::NULL_ITEM;
     return FALSE;
   }
 
   set_handler_by_result_type(arg->result_type());
-  item_type= arg->type();
   return FALSE;
 }
 
@@ -4489,7 +4493,7 @@ resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, SELECT_LEX *select)
         return NULL;
       }
       DBUG_ASSERT((*select_ref)->fixed);
-      return (select->ref_pointer_array + counter);
+      return &select->ref_pointer_array[counter];
     }
     if (group_by_ref)
       return group_by_ref;
@@ -6516,15 +6520,14 @@ Item *Item_field::update_value_transformer(THD *thd, uchar *select_arg)
       type() != Item::TRIGGER_FIELD_ITEM)
   {
     List<Item> *all_fields= &select->join->all_fields;
-    Item **ref_pointer_array= select->ref_pointer_array;
-    DBUG_ASSERT(all_fields->elements <= select->ref_pointer_array_size);
+    Ref_ptr_array &ref_pointer_array= select->ref_pointer_array;
     int el= all_fields->elements;
     Item_ref *ref;
 
     ref_pointer_array[el]= (Item*)this;
     all_fields->push_front((Item*)this, thd->mem_root);
     ref= new (thd->mem_root)
-      Item_ref(thd, &select->context, ref_pointer_array + el,
+      Item_ref(thd, &select->context, &ref_pointer_array[el],
                table_name, field_name);
     return ref;
   }
@@ -6928,6 +6931,7 @@ void Item_ref::set_properties()
     split_sum_func() doesn't try to change the reference.
   */
   with_sum_func= (*ref)->with_sum_func;
+  with_window_func= (*ref)->with_window_func;
   with_field= (*ref)->with_field;
   fixed= 1;
   if (alias_name_used)
diff --git a/sql/item.h b/sql/item.h
index d67a29dee5a..863265a73f7 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -65,6 +65,8 @@ class RANGE_OPT_PARAM;
 class SEL_TREE;
 
 
+typedef Bounds_checked_array<Item*> Ref_ptr_array;
+
 static inline uint32
 char_to_byte_length_safe(uint32 char_length_arg, uint32 mbmaxlen_arg)
 {
@@ -626,7 +628,8 @@ public:
   static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
   static void operator delete(void *ptr, MEM_ROOT *mem_root) {}
 
-  enum Type {FIELD_ITEM= 0, FUNC_ITEM, SUM_FUNC_ITEM, STRING_ITEM,
+  enum Type {FIELD_ITEM= 0, FUNC_ITEM, SUM_FUNC_ITEM,
+             WINDOW_FUNC_ITEM, STRING_ITEM,
 	     INT_ITEM, REAL_ITEM, NULL_ITEM, VARBIN_ITEM,
 	     COPY_STR_ITEM, FIELD_AVG_ITEM, DEFAULT_VALUE_ITEM,
 	     PROC_ITEM,COND_ITEM, REF_ITEM, FIELD_STD_ITEM,
@@ -692,6 +695,7 @@ public:
                                            of a query with ROLLUP */ 
   bool null_value;			/* if item is null */
   bool with_sum_func;                   /* True if item contains a sum func */
+  bool with_window_func;             /* True if item contains a window func */
   /**
     True if any item except Item_sum contains a field. Set during parsing.
   */
@@ -1180,7 +1184,7 @@ public:
   void print_item_w_name(String *, enum_query_type query_type);
   void print_value(String *);
   virtual void update_used_tables() {}
-  virtual COND *build_equal_items(THD *thd, COND_EQUAL *inherited,
+  virtual COND *build_equal_items(THD *thd, COND_EQUAL *inheited,
                                   bool link_item_fields,
                                   COND_EQUAL **cond_equal_ref)
   {
@@ -1216,10 +1220,11 @@ public:
   {
     return false;
   }
-  virtual void split_sum_func(THD *thd, Item **ref_pointer_array,
+  virtual void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
                               List<Item> &fields, uint flags) {}
   /* Called for items that really have to be split */
-  void split_sum_func2(THD *thd, Item **ref_pointer_array, List<Item> &fields,
+  void split_sum_func2(THD *thd, Ref_ptr_array ref_pointer_array,
+                       List<Item> &fields,
                        Item **ref, uint flags);
   virtual bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate);
   bool get_time(MYSQL_TIME *ltime)
@@ -4768,17 +4773,10 @@ public:
    - cmp() method that compares the saved value with the current value of the
      source item, and if they were not equal saves item's value into the saved
      value.
-*/
 
-/*
-  Cached_item_XXX objects are not exactly caches. They do the following:
-
-  Each Cached_item_XXX object has
-   - its source item
-   - saved value of the source item
-   - cmp() method that compares the saved value with the current value of the
-     source item, and if they were not equal saves item's value into the saved
-     value.
+  TODO: add here:
+   - a way to save the new value w/o comparison
+   - a way to do less/equal/greater comparison
 */
 
 class Cached_item :public Sql_alloc
@@ -4786,48 +4784,75 @@ class Cached_item :public Sql_alloc
 public:
   bool null_value;
   Cached_item() :null_value(0) {}
+  /*
+    Compare the cached value with the source value. If not equal, copy
+    the source value to the cache.
+    @return
+      true  - Not equal
+      false - Equal
+  */
   virtual bool cmp(void)=0;
+
+  /* Compare the cached value with the source value, without copying */
+  virtual int  cmp_read_only()=0;
+
   virtual ~Cached_item(); /*line -e1509 */
 };
 
-class Cached_item_str :public Cached_item
+class Cached_item_item : public Cached_item
 {
+protected:
   Item *item;
+
+  Cached_item_item(Item *arg) : item(arg) {}
+public:
+  void fetch_value_from(Item *new_item)
+  {
+    Item *save= item;
+    item= new_item;
+    cmp();
+    item= save;
+  }
+};
+
+class Cached_item_str :public Cached_item_item
+{
   uint32 value_max_length;
   String value,tmp_value;
 public:
   Cached_item_str(THD *thd, Item *arg);
   bool cmp(void);
+  int  cmp_read_only();
   ~Cached_item_str();                           // Deallocate String:s
 };
 
 
-class Cached_item_real :public Cached_item
+class Cached_item_real :public Cached_item_item
 {
-  Item *item;
   double value;
 public:
-  Cached_item_real(Item *item_par) :item(item_par),value(0.0) {}
+  Cached_item_real(Item *item_par) :Cached_item_item(item_par),value(0.0) {}
   bool cmp(void);
+  int  cmp_read_only();
 };
 
-class Cached_item_int :public Cached_item
+class Cached_item_int :public Cached_item_item
 {
-  Item *item;
   longlong value;
 public:
-  Cached_item_int(Item *item_par) :item(item_par),value(0) {}
+  Cached_item_int(Item *item_par) :Cached_item_item(item_par),value(0) {}
   bool cmp(void);
+  int  cmp_read_only();
 };
 
 
-class Cached_item_decimal :public Cached_item
+class Cached_item_decimal :public Cached_item_item
 {
-  Item *item;
   my_decimal value;
 public:
   Cached_item_decimal(Item *item_par);
   bool cmp(void);
+  int  cmp_read_only();
 };
 
 class Cached_item_field :public Cached_item
@@ -4844,6 +4869,7 @@ public:
     buff= (uchar*) thd_calloc(thd, length= field->pack_length());
   }
   bool cmp(void);
+  int  cmp_read_only();
 };
 
 class Item_default_value : public Item_field
@@ -5129,7 +5155,7 @@ public:
     return (this->*processor)(arg);
   }
   virtual Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs);
-  void split_sum_func2_example(THD *thd, Item **ref_pointer_array,
+  void split_sum_func2_example(THD *thd,  Ref_ptr_array ref_pointer_array,
                                List<Item> &fields, uint flags)
   {
     example->split_sum_func2(thd, ref_pointer_array, fields, &example, flags);
diff --git a/sql/item_buff.cc b/sql/item_buff.cc
index 62c2f76dc2e..488eb52fb77 100644
--- a/sql/item_buff.cc
+++ b/sql/item_buff.cc
@@ -71,7 +71,7 @@ Cached_item::~Cached_item() {}
 */
 
 Cached_item_str::Cached_item_str(THD *thd, Item *arg)
-  :item(arg),
+  :Cached_item_item(arg),
    value_max_length(MY_MIN(arg->max_length, thd->variables.max_sort_length)),
    value(value_max_length)
 {}
@@ -98,6 +98,25 @@ bool Cached_item_str::cmp(void)
   return tmp;
 }
 
+
+int Cached_item_str::cmp_read_only()
+{
+  String *res= item->val_str(&tmp_value);
+
+  if (null_value)
+  {
+    if (item->null_value)
+      return 0;
+    else
+      return -1;
+  }
+  if (item->null_value)
+    return 1;
+
+  return sortcmp(&value, res, item->collation.collation);
+}
+
+
 Cached_item_str::~Cached_item_str()
 {
   item=0;					// Safety
@@ -115,6 +134,23 @@ bool Cached_item_real::cmp(void)
   return FALSE;
 }
 
+
+int Cached_item_real::cmp_read_only()
+{
+  double nr= item->val_real();
+  if (null_value)
+  {
+    if (item->null_value)
+      return 0;
+    else
+      return -1;
+  }
+  if (item->null_value)
+    return 1;
+  return (nr == value)? 0 : ((nr < value)? 1: -1);
+}
+
+
 bool Cached_item_int::cmp(void)
 {
   longlong nr=item->val_int();
@@ -128,6 +164,22 @@ bool Cached_item_int::cmp(void)
 }
 
 
+int Cached_item_int::cmp_read_only()
+{
+  longlong nr= item->val_int();
+  if (null_value)
+  {
+    if (item->null_value)
+      return 0;
+    else
+      return -1;
+  }
+  if (item->null_value)
+    return 1;
+  return (nr == value)? 0 : ((nr < value)? 1: -1);
+}
+
+
 bool Cached_item_field::cmp(void)
 {
   bool tmp= FALSE;                              // Value is identical
@@ -148,8 +200,24 @@ bool Cached_item_field::cmp(void)
 }
 
 
+int Cached_item_field::cmp_read_only()
+{
+  if (null_value)
+  {
+    if (field->is_null())
+      return 0;
+    else
+      return -1;
+  }
+  if (field->is_null())
+    return 1;
+
+  return field->cmp(buff);
+}
+
+
 Cached_item_decimal::Cached_item_decimal(Item *it)
-  :item(it)
+  :Cached_item_item(it)
 {
   my_decimal_set_zero(&value);
 }
@@ -174,3 +242,20 @@ bool Cached_item_decimal::cmp()
   return FALSE;
 }
 
+
+int Cached_item_decimal::cmp_read_only()
+{
+  my_decimal tmp;
+  my_decimal *ptmp= item->val_decimal(&tmp);
+  if (null_value)
+  {
+    if (item->null_value)
+      return 0;
+    else
+      return -1;
+  }
+  if (item->null_value)
+    return 1;
+  return my_decimal_cmp(&value, ptmp);
+}
+
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 01dcfb39e0e..187d2820531 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -2482,7 +2482,7 @@ bool Item_func_if::date_op(MYSQL_TIME *ltime, uint fuzzydate)
 }
 
 
-void Item_func_nullif::split_sum_func(THD *thd, Item **ref_pointer_array,
+void Item_func_nullif::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
                                       List<Item> &fields, uint flags)
 {
   if (m_cache)
@@ -4791,7 +4791,7 @@ void Item_cond::traverse_cond(Cond_traverser traverser,
     that have or refer (HAVING) to a SUM expression.
 */
 
-void Item_cond::split_sum_func(THD *thd, Item **ref_pointer_array,
+void Item_cond::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
                                List<Item> &fields, uint flags)
 {
   List_iterator<Item> li(list);
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 2d197a86d9b..5789186dbe8 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -1030,8 +1030,8 @@ public:
   uint decimal_precision() const { return args[2]->decimal_precision(); }
   const char *func_name() const { return "nullif"; }
   void print(String *str, enum_query_type query_type);
-  void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields,
-                      uint flags);
+  void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, 
+                      List<Item> &fields, uint flags);
   void update_used_tables();
   table_map not_null_tables() const { return 0; }
   bool is_null();
@@ -2055,8 +2055,8 @@ public:
                       SARGABLE_PARAM **sargables);
   SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr);
   virtual void print(String *str, enum_query_type query_type);
-  void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields,
-                      uint flags);
+  void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
+                      List<Item> &fields, uint flags);
   friend int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
                          COND **conds);
   void top_level_item() { abort_on_null=1; }
@@ -2083,7 +2083,7 @@ template <template<class> class LI, class T> class Item_equal_iterator;
   All equality predicates of the form field1=field2 contained in a
   conjunction are substituted for a sequence of items of this class.
   An item of this class Item_equal(f1,f2,...fk) represents a
-  multiple equality f1=f2=...=fk.
+  multiple equality f1=f2=...=fk.l
 
   If a conjunction contains predicates f1=f2 and f2=f3, a new item of
   this class is created Item_equal(f1,f2,f3) representing the multiple
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 50b6f4a6b68..57bd004cf88 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -132,6 +132,7 @@ void Item_func::sync_with_sum_func_and_with_field(List<Item> &list)
   while ((item= li++))
   {
     with_sum_func|= item->with_sum_func;
+    with_window_func|= item->with_window_func;
     with_field|= item->with_field;
   }
 }
@@ -226,6 +227,7 @@ Item_func::fix_fields(THD *thd, Item **ref)
 	maybe_null=1;
 
       with_sum_func= with_sum_func || item->with_sum_func;
+      with_window_func= with_window_func || item->with_window_func;
       with_field= with_field || item->with_field;
       used_tables_and_const_cache_join(item);
       with_subselect|=        item->has_subquery();
@@ -431,7 +433,7 @@ void Item_args::propagate_equal_fields(THD *thd,
   See comments in Item_cond::split_sum_func()
 */
 
-void Item_func::split_sum_func(THD *thd, Item **ref_pointer_array,
+void Item_func::split_sum_func(THD *thd,  Ref_ptr_array ref_pointer_array,
                                List<Item> &fields, uint flags)
 {
   Item **arg, **arg_end;
@@ -4904,9 +4906,16 @@ Item_func_set_user_var::update_hash(void *ptr, uint length,
     If we set a variable explicitely to NULL then keep the old
     result type of the variable
   */
-  if ((null_value= args[0]->null_value) && null_item)
+  if (args[0]->type() == Item::FIELD_ITEM)
+  {
+    /* args[0]->null_value may be outdated */
+    null_value= ((Item_field*)args[0])->field->is_null();
+  }
+  else
+    null_value= args[0]->null_value;
+  if (null_value && null_item)
     res_type= m_var_entry->type;                 // Don't change type of item
-  if (::update_hash(m_var_entry, (null_value= args[0]->null_value),
+  if (::update_hash(m_var_entry, null_value,
                     ptr, length, res_type, cs, unsigned_arg))
   {
     null_value= 1;
diff --git a/sql/item_func.h b/sql/item_func.h
index 2ce199b3565..5c21535adbe 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -151,8 +151,8 @@ public:
     sync_with_sum_func_and_with_field(list);
     list.empty();                                     // Fields are used
   }
-  void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields,
-                      uint flags);
+  void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
+                      List<Item> &fields, uint flags);
   virtual void print(String *str, enum_query_type query_type);
   void print_op(String *str, enum_query_type query_type);
   void print_args(String *str, uint from, enum_query_type query_type);
diff --git a/sql/item_row.cc b/sql/item_row.cc
index b1575b81087..56d73f7b759 100644
--- a/sql/item_row.cc
+++ b/sql/item_row.cc
@@ -100,7 +100,7 @@ void Item_row::cleanup()
 }
 
 
-void Item_row::split_sum_func(THD *thd, Item **ref_pointer_array,
+void Item_row::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
                               List<Item> &fields, uint flags)
 {
   Item **arg, **arg_end;
diff --git a/sql/item_row.h b/sql/item_row.h
index 153a6f085b3..ddb6f0835f2 100644
--- a/sql/item_row.h
+++ b/sql/item_row.h
@@ -85,8 +85,8 @@ public:
   bool fix_fields(THD *thd, Item **ref);
   void fix_after_pullout(st_select_lex *new_parent, Item **ref);
   void cleanup();
-  void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields,
-                      uint flags);
+  void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
+                      List<Item> &fields, uint flags);
   table_map used_tables() const { return used_tables_cache; };
   bool const_item() const { return const_item_cache; };
   enum Item_result result_type() const { return ROW_RESULT; }
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 8745baa8c69..94e7bc98618 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -739,7 +739,7 @@ int Item_in_subselect::optimize(double *out_rows, double *cost)
   }
   
   /* Now with grouping */
-  if (join->group_list)
+  if (join->group_list_for_estimates)
   {
     DBUG_PRINT("info",("Materialized join has grouping, trying to estimate it"));
     double output_rows= get_post_group_estimate(join, *out_rows);
@@ -1896,7 +1896,8 @@ bool Item_allany_subselect::transform_into_max_min(JOIN *join)
         (ALL && (> || =>)) || (ANY && (< || =<))
         for ALL condition is inverted
       */
-      item= new (thd->mem_root) Item_sum_max(thd, *select_lex->ref_pointer_array);
+      item= new (thd->mem_root) Item_sum_max(thd,
+                                             select_lex->ref_pointer_array[0]);
     }
     else
     {
@@ -1904,11 +1905,12 @@ bool Item_allany_subselect::transform_into_max_min(JOIN *join)
         (ALL && (< || =<)) || (ANY && (> || =>))
         for ALL condition is inverted
       */
-      item= new (thd->mem_root) Item_sum_min(thd, *select_lex->ref_pointer_array);
+      item= new (thd->mem_root) Item_sum_min(thd,
+                                             select_lex->ref_pointer_array[0]);
     }
     if (upper_item)
       upper_item->set_sum_test(item);
-    thd->change_item_tree(select_lex->ref_pointer_array, item);
+    thd->change_item_tree(&select_lex->ref_pointer_array[0], item);
     {
       List_iterator<Item> it(select_lex->item_list);
       it++;
@@ -2054,8 +2056,8 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN *join,
                                                       thd,
                                                       &select_lex->context,
                                                       this,
-                                                      select_lex->
-                                                      ref_pointer_array,
+                                                      &select_lex->
+                                                      ref_pointer_array[0],  
                                                       (char *)"<ref>",
                                                       this->full_name()));
     if (!abort_on_null && left_expr->maybe_null)
@@ -2130,7 +2132,7 @@ Item_in_subselect::create_single_in_to_exists_cond(JOIN *join,
                        new (thd->mem_root) Item_ref_null_helper(thd,
                                                   &select_lex->context,
                                                   this,
-                                                  select_lex->ref_pointer_array,
+                                                  &select_lex->ref_pointer_array[0],
                                                   (char *)"<no matter>",
                                                   (char *)"<result>"));
         if (!abort_on_null && left_expr->maybe_null)
@@ -2317,7 +2319,7 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
                                      (char *)in_left_expr_name),
                      new (thd->mem_root)
                      Item_ref(thd, &select_lex->context,
-                              select_lex->ref_pointer_array + i,
+                              &select_lex->ref_pointer_array[i],
                               (char *)"<no matter>",
                               (char *)"<list ref>"));
       Item *item_isnull=
@@ -2325,7 +2327,7 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
         Item_func_isnull(thd,
                          new (thd->mem_root)
                          Item_ref(thd, &select_lex->context,
-                                  select_lex->ref_pointer_array+i,
+                                  &select_lex->ref_pointer_array[i],
                                   (char *)"<no matter>",
                                   (char *)"<list ref>"));
       Item *col_item= new (thd->mem_root)
@@ -2343,8 +2345,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
         Item_is_not_null_test(thd, this,
                               new (thd->mem_root)
                               Item_ref(thd, &select_lex->context,
-                                       select_lex->
-                                       ref_pointer_array + i,
+                                       &select_lex->
+                                       ref_pointer_array[i],
                                        (char *)"<no matter>",
                                        (char *)"<list ref>"));
       if (!abort_on_null && left_expr->element_index(i)->maybe_null)
@@ -2382,8 +2384,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
                                      (char *)in_left_expr_name),
                      new (thd->mem_root)
                      Item_direct_ref(thd, &select_lex->context,
-                                     select_lex->
-                                     ref_pointer_array+i,
+                                     &select_lex->
+                                     ref_pointer_array[i],
                                      (char *)"<no matter>",
                                      (char *)"<list ref>"));
       if (!abort_on_null && select_lex->ref_pointer_array[i]->maybe_null)
@@ -2393,7 +2395,7 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
           Item_is_not_null_test(thd, this,
                                 new (thd->mem_root)
                                 Item_ref(thd, &select_lex->context, 
-                                         select_lex->ref_pointer_array + i,
+                                         &select_lex->ref_pointer_array[i],
                                          (char *)"<no matter>",
                                          (char *)"<list ref>"));
         
@@ -2402,8 +2404,8 @@ Item_in_subselect::create_row_in_to_exists_cond(JOIN * join,
           Item_func_isnull(thd,
                            new (thd->mem_root)
                            Item_direct_ref(thd, &select_lex->context,
-                                           select_lex->
-                                           ref_pointer_array+i,
+                                           &select_lex->
+                                           ref_pointer_array[i],
                                            (char *)"<no matter>",
                                            (char *)"<list ref>"));
         item= new (thd->mem_root) Item_cond_or(thd, item, item_isnull);
@@ -3533,8 +3535,7 @@ int subselect_single_select_engine::prepare()
   prepared= 1;
   SELECT_LEX *save_select= thd->lex->current_select;
   thd->lex->current_select= select_lex;
-  if (join->prepare(&select_lex->ref_pointer_array,
-		    select_lex->table_list.first,
+  if (join->prepare(select_lex->table_list.first,
 		    select_lex->with_wild,
 		    select_lex->where,
 		    select_lex->order_list.elements +
@@ -3683,14 +3684,6 @@ int subselect_single_select_engine::exec()
         */
         select_lex->uncacheable|= UNCACHEABLE_EXPLAIN;
         select_lex->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN;
-        /*
-          Force join->join_tmp creation, because this subquery will be replaced
-          by a simple select from the materialization temp table by optimize()
-          called by EXPLAIN and we need to preserve the initial query structure
-          so we can display it.
-        */
-        if (join->need_tmp && join->init_save_join_tab())
-          DBUG_RETURN(1);                        /* purecov: inspected */
       }
     }
     if (item->engine_changed(this))
@@ -5231,7 +5224,7 @@ double get_post_group_estimate(JOIN* join, double join_op_rows)
   table_map tables_in_group_list= table_map(0);
 
   /* Find out which tables are used in GROUP BY list */
-  for (ORDER *order= join->group_list; order; order= order->next)
+  for (ORDER *order= join->group_list_for_estimates; order; order= order->next)
   {
     Item *item= order->item[0];
     table_map item_used_tables= item->used_tables();
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 0c85cf53e18..f774ee5a561 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -1243,7 +1243,8 @@ Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table)
 Item_sum_sum::Item_sum_sum(THD *thd, Item_sum_sum *item) 
   :Item_sum_num(thd, item),
    Type_handler_hybrid_field_type(item),
-   curr_dec_buff(item->curr_dec_buff)
+   curr_dec_buff(item->curr_dec_buff),
+   count(item->count)
 {
   /* TODO: check if the following assignments are really needed */
   if (Item_sum_sum::result_type() == DECIMAL_RESULT)
@@ -1265,6 +1266,7 @@ void Item_sum_sum::clear()
 {
   DBUG_ENTER("Item_sum_sum::clear");
   null_value=1;
+  count= 0;
   if (Item_sum_sum::result_type() == DECIMAL_RESULT)
   {
     curr_dec_buff= 0;
@@ -1318,25 +1320,57 @@ void Item_sum_sum::fix_length_and_dec()
 bool Item_sum_sum::add()
 {
   DBUG_ENTER("Item_sum_sum::add");
+  add_helper(false);
+  DBUG_RETURN(0);
+}
+
+void Item_sum_sum::add_helper(bool perform_removal)
+{
+  DBUG_ENTER("Item_sum_sum::add_helper");
+
   if (Item_sum_sum::result_type() == DECIMAL_RESULT)
   {
     my_decimal value;
     const my_decimal *val= aggr->arg_val_decimal(&value);
     if (!aggr->arg_is_null(true))
     {
-      my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1),
-                     val, dec_buffs + curr_dec_buff);
+      if (perform_removal)
+      {
+        DBUG_ASSERT(count > 0);
+        my_decimal_sub(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff ^ 1),
+                       dec_buffs + curr_dec_buff, val);
+        count--;
+      }
+      else
+      {
+        count++;
+        my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff ^ 1),
+                       val, dec_buffs + curr_dec_buff);
+      }
       curr_dec_buff^= 1;
-      null_value= 0;
+      null_value= (count > 0) ? 0 : 1;
     }
   }
   else
   {
-    sum+= aggr->arg_val_real();
+    if (perform_removal)
+      sum-= aggr->arg_val_real();
+    else
+      sum+= aggr->arg_val_real();
     if (!aggr->arg_is_null(true))
-      null_value= 0;
+    {
+      if (perform_removal)
+      {
+        DBUG_ASSERT(count > 0);
+        count--;
+      }
+      else
+        count++;
+
+      null_value= (count > 0) ? 0 : 1;
+    }
   }
-  DBUG_RETURN(0);
+  DBUG_VOID_RETURN;
 }
 
 
@@ -1386,6 +1420,13 @@ my_decimal *Item_sum_sum::val_decimal(my_decimal *val)
   return val_decimal_from_real(val);
 }
 
+void Item_sum_sum::remove()
+{
+  DBUG_ENTER("Item_sum_sum::remove");
+  add_helper(true);
+  DBUG_VOID_RETURN;
+}
+
 /**
   Aggregate a distinct row from the distinct hash table.
  
@@ -1531,6 +1572,19 @@ bool Item_sum_count::add()
   return 0;
 }
 
+
+/*
+  Remove a row. This is used by window functions.
+*/
+
+void Item_sum_count::remove()
+{
+  DBUG_ASSERT(aggr->Aggrtype() == Aggregator::SIMPLE_AGGREGATOR);
+  if (aggr->arg_is_null(false))
+    return;
+  count--;
+}
+
 longlong Item_sum_count::val_int()
 {
   DBUG_ASSERT(fixed == 1);
@@ -1626,6 +1680,16 @@ bool Item_sum_avg::add()
   return FALSE;
 }
 
+void Item_sum_avg::remove()
+{
+  Item_sum_sum::remove();
+  if (!aggr->arg_is_null(true))
+  {
+    DBUG_ASSERT(count > 0);
+    count--;
+  }
+}
+
 double Item_sum_avg::val_real()
 {
   DBUG_ASSERT(fixed == 1);
@@ -2086,6 +2150,8 @@ longlong Item_sum_bit::val_int()
 void Item_sum_bit::clear()
 {
   bits= reset_bits;
+  if (as_window_function)
+    clear_as_window();
 }
 
 Item *Item_sum_or::copy_or_same(THD* thd)
@@ -2093,15 +2159,79 @@ Item *Item_sum_or::copy_or_same(THD* thd)
   return new (thd->mem_root) Item_sum_or(thd, this);
 }
 
+bool Item_sum_bit::clear_as_window()
+{
+  memset(bit_counters, 0, sizeof(bit_counters));
+  num_values_added= 0;
+  set_bits_from_counters();
+  return 0;
+}
+
+bool Item_sum_bit::remove_as_window(ulonglong value)
+{
+  DBUG_ASSERT(as_window_function);
+  for (int i= 0; i < NUM_BIT_COUNTERS; i++)
+  {
+    if (!bit_counters[i])
+    {
+      // Don't attempt to remove values that were never added.
+      DBUG_ASSERT((value & (1 << i)) == 0);
+      continue;
+    }
+    bit_counters[i]-= (value & (1 << i)) ? 1 : 0;
+  }
+  DBUG_ASSERT(num_values_added > 0);
+  // Prevent overflow;
+  num_values_added = std::min(num_values_added, num_values_added - 1);
+  set_bits_from_counters();
+  return 0;
+}
+
+bool Item_sum_bit::add_as_window(ulonglong value)
+{
+  DBUG_ASSERT(as_window_function);
+  for (int i= 0; i < NUM_BIT_COUNTERS; i++)
+  {
+    bit_counters[i]+= (value & (1 << i)) ? 1 : 0;
+  }
+  // Prevent overflow;
+  num_values_added = std::max(num_values_added, num_values_added + 1);
+  set_bits_from_counters();
+  return 0;
+}
+
+void Item_sum_or::set_bits_from_counters()
+{
+  ulonglong value= 0;
+  for (int i= 0; i < NUM_BIT_COUNTERS; i++)
+  {
+    value|= bit_counters[i] > 0 ? (1 << i) : 0;
+  }
+  bits= value | reset_bits;
+}
 
 bool Item_sum_or::add()
 {
   ulonglong value= (ulonglong) args[0]->val_int();
   if (!args[0]->null_value)
+  {
+    if (as_window_function)
+      return add_as_window(value);
     bits|=value;
+  }
   return 0;
 }
 
+void Item_sum_xor::set_bits_from_counters()
+{
+  ulonglong value= 0;
+  for (int i= 0; i < NUM_BIT_COUNTERS; i++)
+  {
+    value|= (bit_counters[i] % 2) ? (1 << i) : 0;
+  }
+  bits= value ^ reset_bits;
+}
+
 Item *Item_sum_xor::copy_or_same(THD* thd)
 {
   return new (thd->mem_root) Item_sum_xor(thd, this);
@@ -2112,10 +2242,31 @@ bool Item_sum_xor::add()
 {
   ulonglong value= (ulonglong) args[0]->val_int();
   if (!args[0]->null_value)
+  {
+    if (as_window_function)
+      return add_as_window(value);
     bits^=value;
+  }
   return 0;
 }
 
+void Item_sum_and::set_bits_from_counters()
+{
+  ulonglong value= 0;
+  if (!num_values_added)
+  {
+    bits= reset_bits;
+    return;
+  }
+
+  for (int i= 0; i < NUM_BIT_COUNTERS; i++)
+  {
+    // We've only added values of 1 for this bit.
+    if (bit_counters[i] == num_values_added)
+      value|= (1 << i);
+  }
+  bits= value & reset_bits;
+}
 Item *Item_sum_and::copy_or_same(THD* thd)
 {
   return new (thd->mem_root) Item_sum_and(thd, this);
@@ -2126,7 +2277,11 @@ bool Item_sum_and::add()
 {
   ulonglong value= (ulonglong) args[0]->val_int();
   if (!args[0]->null_value)
+  {
+    if (as_window_function)
+      return add_as_window(value);
     bits&=value;
+  }
   return 0;
 }
 
@@ -2314,6 +2469,10 @@ void Item_sum_bit::reset_field()
 
 void Item_sum_bit::update_field()
 {
+  // We never call update_field when computing the function as a window
+  // function. Setting bits to a random value invalidates the bits counters and
+  // the result of the bit function becomes erroneous.
+  DBUG_ASSERT(!as_window_function);
   uchar *res=result_field->ptr;
   bits= uint8korr(res);
   add();
@@ -2896,7 +3055,7 @@ int group_concat_key_cmp_with_order(void* arg, const void* key1,
                   field->table->s->null_bytes);
     int res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset);
     if (res)
-      return (*order_item)->asc ? res : -res;
+      return ((*order_item)->direction == ORDER::ORDER_ASC) ? res : -res;
   }
   /*
     We can't return 0 because in that case the tree class would remove this
@@ -3372,8 +3531,8 @@ bool Item_func_group_concat::setup(THD *thd)
     if (!ref_pointer_array)
       DBUG_RETURN(TRUE);
     memcpy(ref_pointer_array, args, arg_count * sizeof(Item*));
-    if (setup_order(thd, ref_pointer_array, context->table_list, list,
-                    all_fields, *order))
+    if (setup_order(thd, Ref_ptr_array(ref_pointer_array, n_elems),
+                    context->table_list, list,  all_fields, *order))
       DBUG_RETURN(TRUE);
   }
 
@@ -3507,9 +3666,9 @@ void Item_func_group_concat::print(String *str, enum_query_type query_type)
       if (i)
         str->append(',');
       orig_args[i + arg_count_field]->print(str, query_type);
-      if (order[i]->asc)
+      if (order[i]->direction == ORDER::ORDER_ASC)
         str->append(STRING_WITH_LEN(" ASC"));
-      else
+     else
         str->append(STRING_WITH_LEN(" DESC"));
     }
   }
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 811e9d5c59c..cfe2d3db878 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -109,6 +109,7 @@ public:
 
 
 class st_select_lex;
+class Window_spec;
 
 /**
   Class Item_sum is the base class used for special expressions that SQL calls
@@ -347,7 +348,9 @@ public:
   enum Sumfunctype
   { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
     AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
-    VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC
+    VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC,
+    ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC,
+    CUME_DIST_FUNC
   };
 
   Item **ref_by; /* pointer to a ref to the object used to register it */
@@ -540,12 +543,16 @@ public:
   virtual void clear()= 0;
   virtual bool add()= 0;
   virtual bool setup(THD *thd) { return false; }
+  
+  virtual void remove() { DBUG_ASSERT(0); }
 
   virtual void cleanup();
   bool check_vcol_func_processor(uchar *int_arg) 
   {
     return trace_unsupported_by_check_vcol_func_processor(func_name()); 
   }
+  
+  virtual void setup_window_func(THD *thd, Window_spec *window_spec) {}
 };
 
 
@@ -710,6 +717,7 @@ public:
 class Item_sum_int :public Item_sum_num
 {
 public:
+  Item_sum_int(THD *thd): Item_sum_num(thd) {}
   Item_sum_int(THD *thd, Item *item_par): Item_sum_num(thd, item_par) {}
   Item_sum_int(THD *thd, List<Item> &list): Item_sum_num(thd, list) {}
   Item_sum_int(THD *thd, Item_sum_int *item) :Item_sum_num(thd, item) {}
@@ -724,7 +732,7 @@ public:
 
 
 class Item_sum_sum :public Item_sum_num,
-                    public Type_handler_hybrid_field_type
+                   public Type_handler_hybrid_field_type 
 {
 protected:
   double sum;
@@ -763,6 +771,11 @@ public:
     return has_with_distinct() ? "sum(distinct " : "sum("; 
   }
   Item *copy_or_same(THD* thd);
+  void remove();
+
+private:
+  void add_helper(bool perform_removal);
+  ulonglong count;
 };
 
 
@@ -775,6 +788,7 @@ class Item_sum_count :public Item_sum_int
   void clear();
   bool add();
   void cleanup();
+  void remove();
 
   public:
   Item_sum_count(THD *thd, Item *item_par):
@@ -821,6 +835,8 @@ class Item_sum_count :public Item_sum_int
 class Item_sum_avg :public Item_sum_sum
 {
 public:
+  // TODO-cvicentiu given that Item_sum_sum now uses a counter of its own, in
+  // order to implement remove(), it is possible to remove this member.
   ulonglong count;
   uint prec_increment;
   uint f_precision, f_scale, dec_bin_size;
@@ -839,6 +855,7 @@ public:
   }
   void clear();
   bool add();
+  void remove();
   double val_real();
   // In SPs we might force the "wrong" type with select into a declare variable
   longlong val_int() { return val_int_from_real(); }
@@ -1019,14 +1036,18 @@ public:
 
 class Item_sum_bit :public Item_sum_int
 {
-protected:
-  ulonglong reset_bits,bits;
-
 public:
   Item_sum_bit(THD *thd, Item *item_par, ulonglong reset_arg):
-    Item_sum_int(thd, item_par), reset_bits(reset_arg), bits(reset_arg) {}
+    Item_sum_int(thd, item_par), reset_bits(reset_arg), bits(reset_arg),
+    as_window_function(FALSE), num_values_added(0) {}
   Item_sum_bit(THD *thd, Item_sum_bit *item):
-    Item_sum_int(thd, item), reset_bits(item->reset_bits), bits(item->bits) {}
+    Item_sum_int(thd, item), reset_bits(item->reset_bits), bits(item->bits),
+    as_window_function(item->as_window_function),
+    num_values_added(item->num_values_added)
+  {
+    if (as_window_function)
+      memcpy(bit_counters, item->bit_counters, sizeof(bit_counters));
+  }
   enum Sumfunctype sum_func () const {return SUM_BIT_FUNC;}
   void clear();
   longlong val_int();
@@ -1037,8 +1058,42 @@ public:
   void cleanup()
   {
     bits= reset_bits;
+    if (as_window_function)
+      clear_as_window();
     Item_sum_int::cleanup();
   }
+  void setup_window_func(THD *thd __attribute__((unused)),
+                         Window_spec *window_spec __attribute__((unused)))
+  {
+    as_window_function= TRUE;
+    clear_as_window();
+  }
+  void remove()
+  {
+    if (as_window_function)
+    {
+      remove_as_window(args[0]->val_int());
+      return;
+    }
+    // Unless we're counting bits, we can not remove anything.
+    DBUG_ASSERT(0);
+  }
+
+protected:
+  static const int NUM_BIT_COUNTERS= 64;
+  ulonglong reset_bits,bits;
+  /*
+    Marks whether the function is to be computed as a window function.
+  */
+  bool as_window_function;
+  // When used as an aggregate window function, we need to store
+  // this additional information.
+  ulonglong num_values_added;
+  ulonglong bit_counters[NUM_BIT_COUNTERS];
+  bool add_as_window(ulonglong value);
+  bool remove_as_window(ulonglong value);
+  bool clear_as_window();
+  virtual void set_bits_from_counters()= 0;
 };
 
 
@@ -1050,28 +1105,37 @@ public:
   bool add();
   const char *func_name() const { return "bit_or("; }
   Item *copy_or_same(THD* thd);
+
+private:
+  void set_bits_from_counters();
 };
 
 
 class Item_sum_and :public Item_sum_bit
 {
-  public:
+public:
   Item_sum_and(THD *thd, Item *item_par):
     Item_sum_bit(thd, item_par, ULONGLONG_MAX) {}
   Item_sum_and(THD *thd, Item_sum_and *item) :Item_sum_bit(thd, item) {}
   bool add();
   const char *func_name() const { return "bit_and("; }
   Item *copy_or_same(THD* thd);
+
+private:
+  void set_bits_from_counters();
 };
 
 class Item_sum_xor :public Item_sum_bit
 {
-  public:
+public:
   Item_sum_xor(THD *thd, Item *item_par): Item_sum_bit(thd, item_par, 0) {}
   Item_sum_xor(THD *thd, Item_sum_xor *item) :Item_sum_bit(thd, item) {}
   bool add();
   const char *func_name() const { return "bit_xor("; }
   Item *copy_or_same(THD* thd);
+
+private:
+  void set_bits_from_counters();
 };
 
 
diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc
new file mode 100644
index 00000000000..c8f398577b5
--- /dev/null
+++ b/sql/item_windowfunc.cc
@@ -0,0 +1,239 @@
+#include "item_windowfunc.h" 
+#include "my_dbug.h"
+#include "my_global.h"
+#include "sql_select.h" // test if group changed
+
+
+bool
+Item_window_func::resolve_window_name(THD *thd)
+{
+  if (window_spec)
+  {
+    /* The window name has been already resolved */
+    return false;
+  }
+  DBUG_ASSERT(window_name != NULL && window_spec == NULL);
+  char *ref_name= window_name->str;
+
+  /* !TODO: Add the code to resolve ref_name in outer queries */ 
+  /* 
+    First look for the deinition of the window with 'window_name'
+    in the current select
+  */
+  List<Window_spec> curr_window_specs= 
+    List<Window_spec> (thd->lex->current_select->window_specs);
+  List_iterator_fast<Window_spec> it(curr_window_specs);
+  Window_spec *win_spec;
+  while((win_spec= it++))
+  {
+    char *win_spec_name= win_spec->name();
+    if (win_spec_name &&
+        my_strcasecmp(system_charset_info, ref_name, win_spec_name) == 0)
+    {
+      window_spec= win_spec;
+      break;
+    }
+  }
+
+  if (!window_spec)
+  {
+    my_error(ER_WRONG_WINDOW_SPEC_NAME, MYF(0), ref_name);
+    return true;
+  }
+
+  return false;                      
+}
+
+
+void
+Item_window_func::update_used_tables()
+{
+  used_tables_cache= 0;
+  window_func()->update_used_tables();
+  used_tables_cache|= window_func()->used_tables();
+  for (ORDER *ord= window_spec->partition_list->first; ord; ord=ord->next)
+  {
+    Item *item= *ord->item;
+    item->update_used_tables();
+    used_tables_cache|= item->used_tables();
+  }
+  for (ORDER *ord= window_spec->order_list->first; ord; ord=ord->next)
+  {
+    Item *item= *ord->item;
+    item->update_used_tables();
+    used_tables_cache|= item->used_tables();
+  }  
+}
+
+
+bool
+Item_window_func::fix_fields(THD *thd, Item **ref)
+{
+  DBUG_ASSERT(fixed == 0);
+
+  enum_parsing_place place= thd->lex->current_select->parsing_place;
+
+  if (!(place == SELECT_LIST || place == IN_ORDER_BY))
+  {
+    my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
+    return true;
+  }
+
+  if (window_name && resolve_window_name(thd))
+    return true;
+  
+  if (window_spec->window_frame && is_frame_prohibited())
+  {
+    my_error(ER_NOT_ALLOWED_WINDOW_FRAME, MYF(0), window_func()->func_name());
+    return true;
+  }
+
+  if (window_spec->order_list->elements == 0 && is_order_list_mandatory())
+  {
+    my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name());
+    return true;
+  }
+  /*
+    TODO: why the last parameter is 'ref' in this call? What if window_func
+    decides to substitute itself for something else and does *ref=.... ? 
+    This will substitute *this (an Item_window_func object) with Item_sum
+    object. Is this the intent?
+  */
+  if (window_func()->fix_fields(thd, ref))
+    return true;
+
+  const_item_cache= false;
+  with_window_func= true;
+  with_sum_func= false;
+
+  fix_length_and_dec();
+
+  max_length= window_func()->max_length;
+  maybe_null= window_func()->maybe_null;
+
+  fixed= 1;
+  set_phase_to_initial();
+  return false;
+}
+
+
+/*
+  @detail
+    Window function evaluates its arguments when it is scanning the temporary
+    table in partition/order-by order. That is, arguments should be read from
+    the temporary table, not from the original base columns.
+
+    In order for this to work, we need to call "split_sum_func" for each
+    argument. The effect of the call is:
+     1. the argument is added into ref_pointer_array. This will cause the
+        argument to be saved in the temp.table
+     2. argument item is replaced with an Item_ref object. this object refers
+        the argument through the ref_pointer_array.
+
+    then, change_to_use_tmp_fields() will replace ref_pointer_array with an
+    array that points to the temp.table fields.
+    This way, when window_func attempts to evaluate its arguments, it will use
+    Item_ref objects which will read data from the temp.table.
+
+    Note: Before window functions, aggregate functions never needed to do such
+    transformations on their arguments. This is because grouping operation
+    does not need to read from the temp.table.
+    (Q: what happens when we first sort and then do grouping in a
+      group-after-group mode? dont group by items read from temp.table, then?)
+*/
+
+void Item_window_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
+                                      List<Item> &fields, uint flags)
+{
+  for (uint i=0; i < window_func()->argument_count(); i++)
+  {
+    Item **p_item= &window_func()->arguments()[i];
+    (*p_item)->split_sum_func2(thd, ref_pointer_array, fields, p_item, flags);
+  }
+}
+
+
+/*
+  This must be called before advance_window() can be called.
+
+  @detail
+    If we attempt to do it in fix_fields(), partition_fields will refer
+    to the original window function arguments.
+    We need it to refer to temp.table columns.
+*/
+
+void Item_window_func::setup_partition_border_check(THD *thd)
+{
+  partition_tracker.init(thd, window_spec->partition_list);
+  window_func()->setup_window_func(thd, window_spec);
+}
+
+
+void Item_sum_rank::setup_window_func(THD *thd, Window_spec *window_spec)
+{
+  /* TODO: move this into Item_window_func? */
+  peer_tracker.init(thd, window_spec->order_list);
+  clear();
+}
+
+void Item_sum_dense_rank::setup_window_func(THD *thd, Window_spec *window_spec)
+{
+  /* TODO: consider moving this && Item_sum_rank's implementation */
+  peer_tracker.init(thd, window_spec->order_list);
+  clear();
+}
+
+bool Item_sum_dense_rank::add()
+{
+  if (peer_tracker.check_if_next_group())
+    dense_rank++;
+
+  return false;
+}
+
+
+bool Item_sum_rank::add()
+{
+  row_number++;
+  if (peer_tracker.check_if_next_group())
+  {
+    /* Row value changed */
+    cur_rank= row_number;
+  }
+  return false; 
+}
+
+bool Item_window_func::check_if_partition_changed()
+{
+  return partition_tracker.check_if_next_group();
+}
+
+void Item_window_func::advance_window()
+{
+  if (check_if_partition_changed())
+  {
+    /* Next partition */
+    window_func()->clear();
+  }
+  window_func()->add();
+}
+
+bool Item_sum_percent_rank::add()
+{
+  row_number++;
+  if (peer_tracker.check_if_next_group())
+  {
+    /* Row value changed. */
+    cur_rank= row_number;
+  }
+  return false;
+}
+
+void Item_sum_percent_rank::setup_window_func(THD *thd, Window_spec *window_spec)
+{
+  /* TODO: move this into Item_window_func? */
+  peer_tracker.init(thd, window_spec->order_list);
+  clear();
+}
+
+
diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h
new file mode 100644
index 00000000000..9695698bbd9
--- /dev/null
+++ b/sql/item_windowfunc.h
@@ -0,0 +1,693 @@
+#ifndef ITEM_WINDOWFUNC_INCLUDED
+#define ITEM_WINDOWFUNC_INCLUDED
+
+#include "my_global.h"
+#include "item.h"
+
+class Window_spec;
+
+
+int test_if_group_changed(List<Cached_item> &list);
+
+/* A wrapper around test_if_group_changed */
+class Group_bound_tracker
+{
+  List<Cached_item> group_fields;
+public:
+  void init(THD *thd, SQL_I_List<ORDER> *list)
+  {
+    for (ORDER *curr = list->first; curr; curr=curr->next) 
+    {
+      Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
+      group_fields.push_back(tmp);
+    }
+  }
+
+  void cleanup()
+  {
+    group_fields.empty();
+  }
+
+  /*
+    Check if the current row is in a different group than the previous row
+    this function was called for.
+    The new row's group becomes the current row's group.
+  */
+  bool check_if_next_group()
+  {
+    if (test_if_group_changed(group_fields) > -1)
+      return true;
+    return false;
+  }
+
+  int compare_with_cache()
+  {
+    List_iterator<Cached_item> li(group_fields);
+    Cached_item *ptr;
+    int res;
+    while ((ptr= li++))
+    {
+      if ((res= ptr->cmp_read_only()))
+        return res;
+    }
+    return 0;
+  }
+};
+
+/*
+  ROW_NUMBER() OVER (...)
+
+  @detail
+  - This is a Window function (not just an aggregate)
+  - It can be computed by doing one pass over select output, provided 
+    the output is sorted according to the window definition.
+*/
+
+class Item_sum_row_number: public Item_sum_int
+{
+  longlong count;
+
+public:
+  void clear()
+  {
+    count= 0;
+  }
+  bool add() 
+  {
+    count++;
+    return false; 
+  }
+  void update_field() {}
+
+  Item_sum_row_number(THD *thd)
+    : Item_sum_int(thd),  count(0) {}
+
+  enum Sumfunctype sum_func() const
+  {
+    return ROW_NUMBER_FUNC;
+  }
+
+  longlong val_int()
+  {
+    return count;
+  }
+  const char*func_name() const
+  {
+    return "row_number";
+  }
+  
+};
+
+
+/*
+  RANK() OVER (...) Windowing function
+
+  @detail
+  - This is a Window function (not just an aggregate)
+  - It can be computed by doing one pass over select output, provided 
+    the output is sorted according to the window definition.
+
+  The function is defined as:
+
+  "The rank of row R is defined as 1 (one) plus the number of rows that 
+  precede R and are not peers of R"
+
+  "This implies that if two or more rows are not distinct with respect to 
+  the window ordering, then there will be one or more"
+*/
+
+class Item_sum_rank: public Item_sum_int
+{
+protected:
+  longlong row_number; // just ROW_NUMBER()
+  longlong cur_rank;   // current value
+  
+  Group_bound_tracker peer_tracker;
+public:
+  void clear()
+  {
+    /* This is called on partition start */
+    cur_rank= 1;
+    row_number= 0;
+  }
+
+  bool add();
+
+  longlong val_int()
+  {
+    return cur_rank;
+  }
+
+  void update_field() {}
+  /*
+   void reset_field();
+    TODO: ^^ what does this do ? It is not called ever?
+  */
+
+public:
+  Item_sum_rank(THD *thd)
+    : Item_sum_int(thd) {}
+
+  enum Sumfunctype sum_func () const
+  {
+    return RANK_FUNC;
+  }
+
+  const char*func_name() const
+  {
+    return "rank";
+  }
+
+  void setup_window_func(THD *thd, Window_spec *window_spec);
+  void cleanup()
+  {
+    peer_tracker.cleanup();
+    Item_sum_int::cleanup();
+  }
+};
+
+
+/*
+  DENSE_RANK() OVER (...) Windowing function
+
+  @detail
+  - This is a Window function (not just an aggregate)
+  - It can be computed by doing one pass over select output, provided 
+    the output is sorted according to the window definition.
+
+  The function is defined as:
+
+  "If DENSE_RANK is specified, then the rank of row R is defined as the 
+  number of rows preceding and including R that are distinct with respect 
+  to the window ordering"
+
+  "This implies that there are no gaps in the sequential rank numbering of
+  rows in each window partition."
+*/
+
+
+class Item_sum_dense_rank: public Item_sum_int
+{
+  longlong dense_rank;
+  Group_bound_tracker peer_tracker;
+  /*
+     XXX(cvicentiu) This class could potentially be implemented in the rank
+     class, with a switch for the DENSE case.
+  */
+  void clear()
+  {
+    dense_rank= 1;
+  }
+  bool add();
+  void update_field() {}
+  longlong val_int()
+  {
+    return dense_rank;
+  }
+
+ public:
+  Item_sum_dense_rank(THD *thd)
+    : Item_sum_int(thd), dense_rank(0) {}
+  enum Sumfunctype sum_func () const
+  {
+    return DENSE_RANK_FUNC;
+  }
+
+  const char*func_name() const
+  {
+    return "dense_rank";
+  }
+
+  void setup_window_func(THD *thd, Window_spec *window_spec);
+
+  void cleanup()
+  {
+    peer_tracker.cleanup();
+    Item_sum_int::cleanup();
+  }
+};
+
+/* TODO-cvicentiu
+ * Perhaps this is overengineering, but I would like to decouple the 2-pass
+ * algorithm from the specific action that must be performed during the
+ * first pass. The second pass can make use of the "add" function from the
+ * Item_sum_<window_function>.
+ */
+
+/*
+   This class represents a generic interface for window functions that need
+   to store aditional information. Such window functions include percent_rank
+   and cume_dist.
+*/
+class Window_context
+{
+ public:
+  virtual void add_field_to_context(Field* field) = 0;
+  virtual void reset() = 0;
+  virtual ~Window_context() {};
+};
+
+/*
+   A generic interface that specifies the datatype that the context represents.
+*/
+template <typename T>
+class Window_context_getter
+{
+ protected:
+  virtual T get_field_context(const Field* field) = 0;
+  virtual ~Window_context_getter() {};
+};
+
+/*
+   A window function context representing the number of rows that are present
+   with a partition. Because the number of rows is not dependent of the
+   specific value within the current field, we ignore the parameter
+   in this case.
+*/
+class Window_context_row_count :
+  public Window_context, Window_context_getter<ulonglong>
+{
+ public:
+  Window_context_row_count() : num_rows_(0) {};
+
+  void add_field_to_context(Field* field __attribute__((unused)))
+  {
+    num_rows_++;
+  }
+
+  void reset()
+  {
+    num_rows_= 0;
+  }
+
+  ulonglong get_field_context(const Field* field __attribute__((unused)))
+  {
+    return num_rows_;
+  }
+ private:
+  ulonglong num_rows_;
+};
+
+class Window_context_row_and_group_count :
+  public Window_context, Window_context_getter<std::pair<ulonglong, ulonglong> >
+{
+ public:
+  Window_context_row_and_group_count(void * group_list) {}
+};
+
+/*
+  An abstract class representing an item that holds a context.
+*/
+class Item_context
+{
+ public:
+  Item_context() : context_(NULL) {}
+  Window_context* get_window_context() { return context_; }
+
+  virtual bool create_window_context() = 0;
+  virtual void delete_window_context() = 0;
+
+ protected:
+  Window_context* context_;
+};
+
+/*
+  A base window function (aggregate) that also holds a context.
+
+  NOTE: All two pass window functions need to implement
+  this interface.
+*/
+class Item_sum_window_with_context : public Item_sum_num,
+                                     public Item_context
+{
+ public:
+  Item_sum_window_with_context(THD *thd)
+   : Item_sum_num(thd), Item_context() {}
+};
+
+/*
+  @detail
+  "The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is 
+  defined to be the RANK of R and NR is defined to be the number of rows in
+  the window partition of R."
+
+  Computation of this function requires two passes:
+  - First pass to find #rows in the partition
+    This is held within the row_count context.
+  - Second pass to compute rank of current row and the value of the function
+*/
+class Item_sum_percent_rank: public Item_sum_window_with_context,
+                             public Window_context_row_count
+{
+ public:
+  Item_sum_percent_rank(THD *thd)
+    : Item_sum_window_with_context(thd), cur_rank(1) {}
+
+  longlong val_int()
+  {
+   /*
+      Percent rank is a real value so calling the integer value should never
+      happen. It makes no sense as it gets truncated to either 0 or 1.
+   */
+    DBUG_ASSERT(0);
+    return 0;
+  }
+
+  double val_real()
+  {
+   /*
+     We can not get the real value without knowing the number of rows
+     in the partition. Don't divide by 0.
+   */
+   if (!get_context_())
+   {
+     // Calling this kind of function with a context makes no sense.
+     DBUG_ASSERT(0);
+     return 0;
+   }
+
+   longlong partition_rows = get_context_()->get_field_context(result_field);
+   return partition_rows > 1 ?
+             static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
+  }
+
+  enum Sumfunctype sum_func () const
+  {
+    return PERCENT_RANK_FUNC;
+  }
+
+  const char*func_name() const
+  {
+    return "percent_rank";
+  }
+
+  bool create_window_context()
+  {
+    // TODO-cvicentiu: Currently this means we must make sure to delete
+    // the window context. We can potentially allocate this on the THD memroot.
+    // At the same time, this is only necessary for a small portion of the
+    // query execution and it does not make sense to keep it for all of it.
+    context_ = new Window_context_row_count();
+    if (context_ == NULL)
+      return true;
+    return false;
+  }
+
+  void delete_window_context()
+  {
+    if (context_)
+      delete get_context_();
+    context_ = NULL;
+  }
+
+  void update_field() {}
+
+  void clear()
+  {
+    cur_rank= 1;
+    row_number= 0;
+  }
+  bool add();
+  enum Item_result result_type () const { return REAL_RESULT; }
+  enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
+
+  void fix_length_and_dec()
+  {
+    decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
+                    // requires.
+  }
+
+  void setup_window_func(THD *thd, Window_spec *window_spec);
+
+ private:
+  longlong cur_rank;   // Current rank of the current row.
+  longlong row_number; // Value if this were ROW_NUMBER() function.
+
+  Group_bound_tracker peer_tracker;
+
+  void cleanup()
+  {
+    peer_tracker.cleanup();
+    Item_sum_window_with_context::cleanup();
+  }
+
+  /* Helper function so that we don't cast the context every time. */
+  Window_context_row_count* get_context_()
+  {
+    return static_cast<Window_context_row_count *>(context_);
+  }
+};
+
+
+
+
+/*
+  @detail
+  "The relative rank of a row R is defined as NP/NR, where 
+  - NP is defined to be the number of rows preceding or peer with R in the 
+    window ordering of the window partition of R
+  - NR is defined to be the number of rows in the window partition of R.
+
+  Just like with Item_sum_percent_rank, compuation of this function requires
+  two passes.
+*/
+
+class Item_sum_cume_dist: public Item_sum_percent_rank
+{
+ public:
+  Item_sum_cume_dist(THD *thd)
+    : Item_sum_percent_rank(thd) {}
+
+  double val_real() { return 0; }
+
+  enum Sumfunctype sum_func () const
+  {
+    return CUME_DIST_FUNC;
+  }
+
+  const char*func_name() const
+  {
+    return "cume_dist";
+  }
+};
+
+
+class Item_window_func : public Item_func_or_sum
+{
+  /* Window function parameters as we've got them from the parser */
+public:
+  LEX_STRING *window_name;
+public:
+  Window_spec *window_spec;
+  
+  /*
+    This stores the data about the partition we're currently in.
+    advance_window() uses this to tell when we've left one partition and
+    entered another
+  */
+  Group_bound_tracker partition_tracker;
+public:
+  Item_window_func(THD *thd, Item_sum *win_func, LEX_STRING *win_name)
+    : Item_func_or_sum(thd, (Item *) win_func),
+      window_name(win_name), window_spec(NULL), 
+      force_return_blank(true),
+      read_value_from_result_field(false) {}
+
+  Item_window_func(THD *thd, Item_sum *win_func, Window_spec *win_spec)
+    : Item_func_or_sum(thd, (Item *) win_func), 
+      window_name(NULL), window_spec(win_spec), 
+      force_return_blank(true),
+      read_value_from_result_field(false) {}
+
+  Item_sum *window_func() { return (Item_sum *) args[0]; }
+
+  void update_used_tables();
+
+  bool is_frame_prohibited()
+  {
+    switch (window_func()->sum_func()) {
+    case Item_sum::ROW_NUMBER_FUNC:
+    case Item_sum::RANK_FUNC:
+    case Item_sum::DENSE_RANK_FUNC:
+    case Item_sum::PERCENT_RANK_FUNC:
+    case Item_sum::CUME_DIST_FUNC:
+      return true;
+    default: 
+      return false;
+    }
+  }
+
+  bool is_order_list_mandatory()
+  {
+    switch (window_func()->sum_func()) {
+    case Item_sum::RANK_FUNC:
+    case Item_sum::DENSE_RANK_FUNC:
+    case Item_sum::PERCENT_RANK_FUNC:
+    case Item_sum::CUME_DIST_FUNC:
+      return true;
+    default: 
+      return false;
+    }
+  }  
+
+  /*
+    Computation functions.
+    TODO: consoder merging these with class Group_bound_tracker.
+  */
+  void setup_partition_border_check(THD *thd);
+
+  void advance_window();
+  bool check_if_partition_changed();
+
+  enum_field_types field_type() const
+  { 
+    return ((Item_sum *) args[0])->field_type(); 
+  }
+  enum Item::Type type() const { return Item::WINDOW_FUNC_ITEM; }
+
+private:
+  /* 
+    Window functions are very special functions, so val_() methods have
+    special meaning for them:
+
+    - Phase#1, "Initial" we run the join and put its result into temporary 
+      table. For window functions, we write the default value (NULL?) as 
+      a placeholder.
+      
+    - Phase#2: "Computation": executor does the scan in {PARTITION, ORDER BY} 
+      order of this window function. It calls appropriate methods to inform 
+      the window function about rows entering/leaving the window. 
+      It calls window_func()->val_int() so that current window function value
+      can be saved and stored in the temp.table.
+
+    - Phase#3: "Retrieval" the temporary table is read and passed to query 
+      output. However, Item_window_func still remains in the select list,
+      so item_windowfunc->val_int() will be called.
+      During Phase#3, read_value_from_result_field= true.
+  */
+  bool force_return_blank;
+  bool read_value_from_result_field;
+
+public:
+  void set_phase_to_initial()
+  {
+    force_return_blank= true;
+    read_value_from_result_field= false;
+  }
+  void set_phase_to_computation()
+  {
+    force_return_blank= false;
+    read_value_from_result_field= false;
+  }
+  void set_phase_to_retrieval()
+  {
+    force_return_blank= false;
+    read_value_from_result_field= true;
+  }
+
+  double val_real() 
+  {
+    double res;
+    if (force_return_blank)
+    {
+      res= 0.0;
+      null_value= false;
+    }
+    else if (read_value_from_result_field)
+    {
+      res= result_field->val_real();
+      null_value= result_field->is_null();
+    }
+    else
+    {
+      res= window_func()->val_real();
+      null_value= window_func()->null_value;
+    }
+    return res;
+  }
+
+  longlong val_int()
+  {
+    longlong res;
+    if (force_return_blank)
+    {
+      res= 0;
+      null_value= false;
+    }
+    else if (read_value_from_result_field)
+    {
+      res= result_field->val_int();
+      null_value= result_field->is_null();
+    }
+    else
+    {
+      res= window_func()->val_int();
+      null_value= window_func()->null_value;
+    }
+    return res;
+  }
+
+  String* val_str(String* str)
+  {
+    String *res;
+    if (force_return_blank)
+    {
+      null_value= false;
+      str->length(0);
+      res= str;
+    }
+    else if (read_value_from_result_field)
+    {
+      if ((null_value= result_field->is_null()))
+        res= NULL;
+      else
+        res= result_field->val_str(str);
+    }
+    else
+    {
+      res= window_func()->val_str(str);
+      null_value= window_func()->null_value;
+    }
+    return res;
+  }
+
+  my_decimal* val_decimal(my_decimal* dec)
+  {
+    my_decimal *res;
+    if (force_return_blank)
+    {
+      my_decimal_set_zero(dec);
+      null_value= false;
+      res= dec;
+    }
+    else if (read_value_from_result_field)
+    {
+      if ((null_value= result_field->is_null()))
+        res= NULL;
+      else
+        res= result_field->val_decimal(dec);
+    }
+    else
+    {
+      res= window_func()->val_decimal(dec);
+      null_value= window_func()->null_value;
+    }
+    return res;
+  }
+
+  void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
+                              List<Item> &fields, uint flags);
+  void fix_length_and_dec()
+  {
+    decimals = window_func()->decimals;
+  }
+
+  const char* func_name() const { return "WF"; }
+
+  bool fix_fields(THD *thd, Item **ref);
+
+  bool resolve_window_name(THD *thd);
+
+};
+
+#endif /* ITEM_WINDOWFUNC_INCLUDED */
diff --git a/sql/lex.h b/sql/lex.h
index da5fa2de137..01e73f5f3d3 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -221,6 +221,7 @@ static SYMBOL symbols[] = {
   { "EVERY",		SYM(EVERY_SYM)},
   { "EXAMINED",         SYM(EXAMINED_SYM)},
   { "EXCHANGE",         SYM(EXCHANGE_SYM)},
+  { "EXCLUDE",          SYM(EXCLUDE_SYM)},
   { "EXECUTE",		SYM(EXECUTE_SYM)},
   { "EXISTS",		SYM(EXISTS)},
   { "EXIT",             SYM(EXIT_SYM)},
@@ -241,6 +242,7 @@ static SYMBOL symbols[] = {
   { "FLOAT4",		SYM(FLOAT_SYM)},
   { "FLOAT8",		SYM(DOUBLE_SYM)},
   { "FLUSH",		SYM(FLUSH_SYM)},
+  { "FOLLOWING",        SYM(FOLLOWING_SYM)},
   { "FOR",		SYM(FOR_SYM)},
   { "FORCE",		SYM(FORCE_SYM)},
   { "FOREIGN",		SYM(FOREIGN)},
@@ -425,9 +427,11 @@ static SYMBOL symbols[] = {
   { "OPTIONALLY",	SYM(OPTIONALLY)},
   { "OR",		SYM(OR_SYM)},
   { "ORDER",		SYM(ORDER_SYM)},
+  { "OTHERS",           SYM(OTHERS_SYM)},
   { "OUT",              SYM(OUT_SYM)},
   { "OUTER",		SYM(OUTER)},
   { "OUTFILE",		SYM(OUTFILE)},
+  { "OVER",             SYM(OVER_SYM)},
   { "OWNER",		SYM(OWNER_SYM)},
   { "PACK_KEYS",	SYM(PACK_KEYS_SYM)},
   { "PAGE",	        SYM(PAGE_SYM)},
@@ -446,6 +450,7 @@ static SYMBOL symbols[] = {
   { "POINT",		SYM(POINT_SYM)},
   { "POLYGON",		SYM(POLYGON)},
   { "PORT",		SYM(PORT_SYM)},
+  { "PRECEDING",        SYM(PRECEDING_SYM)},
   { "PRECISION",	SYM(PRECISION)},
   { "PREPARE",          SYM(PREPARE_SYM)},
   { "PRESERVE",		SYM(PRESERVE_SYM)},
@@ -602,6 +607,7 @@ static SYMBOL symbols[] = {
   { "TEXT",		SYM(TEXT_SYM)},
   { "THAN",             SYM(THAN_SYM)},
   { "THEN",		SYM(THEN_SYM)},
+  { "TIES",             SYM(TIES_SYM)},
   { "TIME",		SYM(TIME_SYM)},
   { "TIMESTAMP",	SYM(TIMESTAMP)},
   { "TIMESTAMPADD",     SYM(TIMESTAMP_ADD)},
@@ -619,6 +625,7 @@ static SYMBOL symbols[] = {
   { "TRUNCATE",		SYM(TRUNCATE_SYM)},
   { "TYPE",		SYM(TYPE_SYM)},
   { "TYPES",		SYM(TYPES_SYM)},
+  { "UNBOUNDED",        SYM(UNBOUNDED_SYM)},
   { "UNCOMMITTED",	SYM(UNCOMMITTED_SYM)},
   { "UNDEFINED",	SYM(UNDEFINED_SYM)},
   { "UNDO_BUFFER_SIZE",	SYM(UNDO_BUFFER_SIZE_SYM)},
@@ -660,6 +667,7 @@ static SYMBOL symbols[] = {
   { "WHEN",		SYM(WHEN_SYM)},
   { "WHERE",		SYM(WHERE)},
   { "WHILE",            SYM(WHILE_SYM)},
+  { "WINDOW",           SYM(WINDOW_SYM)}, 
   { "WITH",		SYM(WITH)},
   { "WORK",		SYM(WORK_SYM)},
   { "WRAPPER",		SYM(WRAPPER_SYM)},
@@ -682,10 +690,12 @@ static SYMBOL sql_functions[] = {
   { "BIT_XOR",		SYM(BIT_XOR)},
   { "CAST",		SYM(CAST_SYM)},
   { "COUNT",		SYM(COUNT_SYM)},
+  { "CUME_DIST",        SYM(CUME_DIST_SYM)},
   { "CURDATE",		SYM(CURDATE)},
   { "CURTIME",		SYM(CURTIME)},
   { "DATE_ADD",		SYM(DATE_ADD_INTERVAL)},
   { "DATE_SUB",		SYM(DATE_SUB_INTERVAL)},
+  { "DENSE_RANK",       SYM(DENSE_RANK_SYM)},
   { "EXTRACT",		SYM(EXTRACT_SYM)},
   { "GROUP_CONCAT",	SYM(GROUP_CONCAT_SYM)},
   { "MAX",		SYM(MAX_SYM)},
@@ -693,6 +703,9 @@ static SYMBOL sql_functions[] = {
   { "MIN",		SYM(MIN_SYM)},
   { "NOW",		SYM(NOW_SYM)},
   { "POSITION",		SYM(POSITION_SYM)},
+  { "PERCENT_RANK",     SYM(PERCENT_RANK_SYM)},
+  { "RANK",             SYM(RANK_SYM)},
+  { "ROW_NUMBER",       SYM(ROW_NUMBER_SYM)},
   { "SESSION_USER",     SYM(USER)},
   { "STD",		SYM(STD_SYM)},
   { "STDDEV",		SYM(STD_SYM)},
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 7169a3eda81..af027ad6fdc 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -1533,7 +1533,7 @@ end:
   if (!head->no_keyread)
   {
     doing_key_read= 1;
-    head->enable_keyread();
+    head->set_keyread(true);
   }
 
   head->prepare_for_position();
@@ -10643,7 +10643,7 @@ int read_keys_and_merge_scans(THD *thd,
   if (!head->key_read)
   {
     enabled_keyread= 1;
-    head->enable_keyread();
+    head->set_keyread(true);
   }
   head->prepare_for_position();
 
@@ -10736,7 +10736,7 @@ int read_keys_and_merge_scans(THD *thd,
     index merge currently doesn't support "using index" at all
   */
   if (enabled_keyread)
-    head->disable_keyread();
+    head->set_keyread(false);
   if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0,
                        &unique->sort, 1 , 1, TRUE))
     result= 1;
@@ -10744,7 +10744,7 @@ int read_keys_and_merge_scans(THD *thd,
 
 err:
   if (enabled_keyread)
-    head->disable_keyread();
+    head->set_keyread(false);
   DBUG_RETURN(1);
 }
 
@@ -12071,9 +12071,6 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
     DBUG_RETURN(NULL); /* Cannot execute with correlated conditions. */
 
   /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
-  if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
-    DBUG_RETURN(NULL);
-
   List_iterator<Item> select_items_it(join->fields_list);
   is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
 
@@ -13456,7 +13453,7 @@ QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
   {
     DBUG_ASSERT(file == head->file);
     if (doing_key_read)
-      head->disable_keyread();
+      head->set_keyread(false);
     /*
       There may be a code path when the same table was first accessed by index,
       then the index is closed, and the table is scanned (order by + loose scan).
@@ -13649,7 +13646,7 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
   if (!head->key_read)
   {
     doing_key_read= 1;
-    head->enable_keyread(); /* We need only the key attributes */
+    head->set_keyread(true); /* We need only the key attributes */
   }
   if ((result= file->ha_index_init(index,1)))
   {
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index e53e2a9ee0d..9140ed11828 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -445,7 +445,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
 static bool convert_subq_to_jtbm(JOIN *parent_join, 
                                  Item_in_subselect *subq_pred, bool *remove);
 static TABLE_LIST *alloc_join_nest(THD *thd);
-static uint get_tmp_table_rec_length(Item **p_list, uint elements);
+static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements);
 static double get_tmp_table_lookup_cost(THD *thd, double row_count,
                                         uint row_size);
 static double get_tmp_table_write_cost(THD *thd, double row_count,
@@ -2236,13 +2236,9 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
             JOIN_TAB *tab= join->best_positions[i].table;
             join->map2table[tab->table->tablenr]= tab;
           }
-          //List_iterator<Item> it(right_expr_list);
-          Item **ref_array= subq_select->ref_pointer_array;
-          Item **ref_array_end= ref_array + subq_select->item_list.elements; 
           table_map map= 0;
-          //while ((item= it++))
-          for (;ref_array < ref_array_end; ref_array++)
-            map |= (*ref_array)->used_tables();
+          for (uint i=0; i < subq_select->item_list.elements; i++)
+            map|= subq_select->ref_pointer_array[i]->used_tables();
           map= map & ~PSEUDO_TABLE_BITS;
           Table_map_iterator tm_it(map);
           int tableno;
@@ -2305,15 +2301,14 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
     Length of the temptable record, in bytes
 */
 
-static uint get_tmp_table_rec_length(Item **p_items, uint elements)
+static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements)
 {
   uint len= 0;
   Item *item;
   //List_iterator<Item> it(items);
-  Item **p_item;
-  for (p_item= p_items; p_item < p_items + elements ; p_item++)
+  for (uint i= 0; i < elements ; i++)
   {
-    item = *p_item;
+    item = p_items[i];
     switch (item->result_type()) {
     case REAL_RESULT:
       len += sizeof(double);
@@ -3562,13 +3557,10 @@ bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab)
   */
   sjm->sjm_table_param.init();
   sjm->sjm_table_param.bit_fields_as_long= TRUE;
-  //List_iterator<Item> it(item_list);
   SELECT_LEX *subq_select= emb_sj_nest->sj_subq_pred->unit->first_select();
-  Item **p_item= subq_select->ref_pointer_array;
-  Item **p_end= p_item + subq_select->item_list.elements;
-  //while((right_expr= it++))
-  for(;p_item != p_end; p_item++)
-    sjm->sjm_table_cols.push_back(*p_item, thd->mem_root);
+  Ref_ptr_array p_items= subq_select->ref_pointer_array;
+  for (uint i= 0; i < subq_select->item_list.elements; i++)
+    sjm->sjm_table_cols.push_back(p_items[i], thd->mem_root);
   
   sjm->sjm_table_param.field_count= subq_select->item_list.elements;
   sjm->sjm_table_param.force_not_null_cols= TRUE;
@@ -3724,13 +3716,13 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
     */
     sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements];
     //it.rewind();
-    Item **p_item= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array;
+    Ref_ptr_array p_items= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array;
     for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
     {
       bool dummy;
       Item_equal *item_eq;
       //Item *item= (it++)->real_item();
-      Item *item= (*(p_item++))->real_item();
+      Item *item= p_items[i]->real_item();
       DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
       Field *copy_to= ((Item_field*)item)->field;
       /*
@@ -5603,7 +5595,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
     */
     /* C.1 Compute the cost of the materialization strategy. */
     //uint rowlen= get_tmp_table_rec_length(unit->first_select()->item_list);
-    uint rowlen= get_tmp_table_rec_length(ref_pointer_array, 
+    uint rowlen= get_tmp_table_rec_length(ref_ptrs, 
                                           select_lex->item_list.elements);
     /* The cost of writing one row into the temporary table. */
     double write_cost= get_tmp_table_write_cost(thd, inner_record_count_1,
diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc
index f36887eb137..ad3f5aed112 100644
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -406,7 +406,7 @@ int opt_sum_query(THD *thd,
 	  if (!error && reckey_in_range(is_max, &ref, item_field->field, 
 			                conds, range_fl, prefix_len))
 	    error= HA_ERR_KEY_NOT_FOUND;
-          table->disable_keyread();
+          table->set_keyread(false);
           table->file->ha_index_end();
           if (error)
 	  {
@@ -968,7 +968,7 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
             converted (for example to upper case)
           */
           if (field->part_of_key.is_set(idx))
-            table->enable_keyread();
+            table->set_keyread(true);
           DBUG_RETURN(TRUE);
         }
       }
diff --git a/sql/records.cc b/sql/records.cc
index 3995bea6569..e7a4ab836c0 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -39,7 +39,7 @@ int rr_sequential(READ_RECORD *info);
 static int rr_from_tempfile(READ_RECORD *info);
 static int rr_unpack_from_tempfile(READ_RECORD *info);
 static int rr_unpack_from_buffer(READ_RECORD *info);
-static int rr_from_pointers(READ_RECORD *info);
+int rr_from_pointers(READ_RECORD *info);
 static int rr_from_cache(READ_RECORD *info);
 static int init_rr_cache(THD *thd, READ_RECORD *info);
 static int rr_cmp(uchar *a,uchar *b);
@@ -316,7 +316,7 @@ void end_read_record(READ_RECORD *info)
   }
   if (info->table)
   {
-    if (info->table->created)
+    if (info->table->is_created())
       (void) info->table->file->extra(HA_EXTRA_NO_CACHE);
     if (info->read_record != rr_quick) // otherwise quick_range does it
       (void) info->table->file->ha_index_or_rnd_end();
@@ -535,7 +535,7 @@ static int rr_unpack_from_tempfile(READ_RECORD *info)
   return 0;
 }
 
-static int rr_from_pointers(READ_RECORD *info)
+int rr_from_pointers(READ_RECORD *info)
 {
   int tmp;
   uchar *cache_pos;
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 0d168e937a9..dfd12ec20ac 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -7154,3 +7154,33 @@ ER_WRONG_ORDER_IN_WITH_CLAUSE
         eng "The definition of the table '%s' refers to the table '%s' defined later in a non-recursive WITH clause"
 ER_RECURSIVE_QUERY_IN_WITH_CLAUSE
         eng "Recursive queries in WITH clause are not supported yet"
+ER_WRONG_WINDOW_SPEC_NAME
+        eng "Window specification with name '%s' is not defined"
+ER_DUP_WINDOW_NAME
+        eng "Multiple window specifications with the same name '%s'"
+ER_PARTITION_LIST_IN_REFERENCING_WINDOW_SPEC
+        eng "Window specification referencing another one '%s' cannot contain partition list"
+ER_ORDER_LIST_IN_REFERENCING_WINDOW_SPEC
+        eng "Referenced window specification '%s' already contains order list"
+ER_WINDOW_FRAME_IN_REFERENCED_WINDOW_SPEC
+        eng "Referenced window specification '%s' cannot contain window frame"
+ER_BAD_COMBINATION_OF_WINDOW_FRAME_BOUND_SPECS
+        eng "Unacceptable combination of window frame bound specifications"
+ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION
+        eng "Window function is allowed only in SELECT list and ORDER BY clause"
+ER_WINDOW_FUNCTION_IN_WINDOW_SPEC
+        eng "Window function is not allowed in window specification"
+ER_NOT_ALLOWED_WINDOW_FRAME
+        eng "Window frame is not allowed with '%s'"
+ER_NO_ORDER_LIST_IN_WINDOW_SPEC
+        eng "No order list in window specification for '%s'"
+ER_RANGE_FRAME_NEEDS_SIMPLE_ORDERBY
+        eng "RANGE-type frame requires ORDER BY clause with single sort key"
+ER_WRONG_TYPE_FOR_ROWS_FRAME
+        eng "Integer is required for ROWS-type frame"
+ER_WRONG_TYPE_FOR_RANGE_FRAME
+        eng "Numeric datatype is required for RANGE-type frame"
+ER_FRAME_EXCLUSION_NOT_SUPPORTED
+        eng "Frame exclusion is not supported yet"
+ER_WINDOW_FUNCTION_DONT_HAVE_FRAME
+        eng "This window function may not have a window frame"
diff --git a/sql/sql_analyze_stmt.cc b/sql/sql_analyze_stmt.cc
index d11c93229b0..58f72d6b8de 100644
--- a/sql/sql_analyze_stmt.cc
+++ b/sql/sql_analyze_stmt.cc
@@ -69,75 +69,3 @@ void Filesort_tracker::print_json_members(Json_writer *writer)
   }
 }
 
-
-/* 
-  Report that we are doing a filesort. 
-    @return 
-      Tracker object to be used with filesort
-*/
-
-Filesort_tracker *Sort_and_group_tracker::report_sorting(THD *thd)
-{
-  DBUG_ASSERT(cur_action < MAX_QEP_ACTIONS);
-
-  if (total_actions)
-  {
-    /* This is not the first execution. Check */
-    if (qep_actions[cur_action] != EXPL_ACTION_FILESORT)
-    {
-      varied_executions= true;
-      cur_action++;
-      if (!dummy_fsort_tracker)
-        dummy_fsort_tracker= new (thd->mem_root) Filesort_tracker(is_analyze);
-      return dummy_fsort_tracker;
-    }
-    return qep_actions_data[cur_action++].filesort_tracker;
-  }
-
-  Filesort_tracker *fs_tracker= new(thd->mem_root)Filesort_tracker(is_analyze);
-  qep_actions_data[cur_action].filesort_tracker= fs_tracker;
-  qep_actions[cur_action++]= EXPL_ACTION_FILESORT;
-
-  return fs_tracker;
-}
-
-
-void Sort_and_group_tracker::report_tmp_table(TABLE *tbl)
-{
-  DBUG_ASSERT(cur_action < MAX_QEP_ACTIONS);
-  if (total_actions)
-  {
-    /* This is not the first execution. Check if the steps match.  */
-    // todo: should also check that tmp.table kinds are the same.
-    if (qep_actions[cur_action] != EXPL_ACTION_TEMPTABLE)
-      varied_executions= true;
-  }
-
-  if (!varied_executions)
-  {
-    qep_actions[cur_action]= EXPL_ACTION_TEMPTABLE;
-    // qep_actions_data[cur_action]= ....
-  }
-  
-  cur_action++;
-}
-
-
-void Sort_and_group_tracker::report_duplicate_removal()
-{
-  DBUG_ASSERT(cur_action < MAX_QEP_ACTIONS);
-  if (total_actions)
-  {
-    /* This is not the first execution. Check if the steps match.  */
-    if (qep_actions[cur_action] != EXPL_ACTION_REMOVE_DUPS)
-      varied_executions= true;
-  }
-
-  if (!varied_executions)
-  {
-    qep_actions[cur_action]= EXPL_ACTION_REMOVE_DUPS;
-  }
-
-  cur_action++;
-}
-
diff --git a/sql/sql_analyze_stmt.h b/sql/sql_analyze_stmt.h
index d7634bdfb85..2a08a842dfc 100644
--- a/sql/sql_analyze_stmt.h
+++ b/sql/sql_analyze_stmt.h
@@ -284,174 +284,3 @@ private:
   ulonglong sort_buffer_size;
 };
 
-
-typedef enum 
-{
-  EXPL_NO_TMP_TABLE=0,
-  EXPL_TMP_TABLE_BUFFER,
-  EXPL_TMP_TABLE_GROUP,
-  EXPL_TMP_TABLE_DISTINCT
-} enum_tmp_table_use;
-
-
-typedef enum 
-{
-  EXPL_ACTION_EOF, /* not-an-action */
-  EXPL_ACTION_FILESORT,
-  EXPL_ACTION_TEMPTABLE,
-  EXPL_ACTION_REMOVE_DUPS,
-} enum_qep_action;
-
-
-/*
-  This is to track how a JOIN object has resolved ORDER/GROUP BY/DISTINCT
-  
-  We are not tied to the query plan at all, because query plan does not have 
-  sufficient information. *A lot* of decisions about ordering/grouping are 
-  made at very late stages (in JOIN::exec, JOIN::init_execution, in
-  create_sort_index and even in create_tmp_table).
-
-  The idea is that operations that happen during select execution will report
-  themselves. We have these operations:
-  - Sorting with filesort()
-  - Duplicate row removal (the one done by remove_duplicates()).
-  - Use of temporary table to buffer the result.
-
-  There is also "Selection" operation, done by do_select(). It reads rows,
-  there are several distinct cases:
-   1. doing the join operation on the base tables
-   2. reading the temporary table
-   3. reading the filesort output
-  it would be nice to build execution graph, e.g.
-
-    Select(JOIN op) -> temp.table -> filesort -> Select(filesort result)
-
-  the problem is that there is no way to tell what a do_select() call will do.
-
-  Our solution is not to have explicit selection operations. We make these
-  assumptions about the query plan:
-  - Select(JOIN op) is the first operation in the query plan
-  - Unless the first recorded operation is filesort(). filesort() is unable 
-    read result of a select, so when we find it first, the query plan is:
-
-    filesort(first join table) -> Select(JOIN op) -> ...
-
-  the other popular query plan is:
-
-    Select (JOIN op) -> temp.table -> filesort() -> ...
-
-///TODO: handle repeated execution with subselects!
-*/
-
-class Sort_and_group_tracker : public Sql_alloc
-{
-  enum { MAX_QEP_ACTIONS = 5 };
-
-  /* Query actions in the order they were made. */
-  enum_qep_action qep_actions[MAX_QEP_ACTIONS];
-  
-  /* Number for the next action */
-  int cur_action;
-
-  /*
-    Non-zero means there was already an execution which had
-    #total_actions actions
-  */
-  int total_actions;
-
-  int get_n_actions()
-  {
-    return total_actions? total_actions: cur_action;
-  }
-
-  /*
-    TRUE<=>there were executions which took different sort/buffer/de-duplicate
-    routes. The counter values are not meaningful.
-  */
-  bool varied_executions;
-
-  /* Details about query actions */
-  union 
-  {
-    Filesort_tracker *filesort_tracker;
-    enum_tmp_table_use tmp_table;
-  } 
-  qep_actions_data[MAX_QEP_ACTIONS];
-  
-  Filesort_tracker *dummy_fsort_tracker;
-  bool is_analyze;
-public:
-  Sort_and_group_tracker(bool is_analyze_arg) :
-    cur_action(0), total_actions(0), varied_executions(false),
-    dummy_fsort_tracker(NULL),
-    is_analyze(is_analyze_arg)
-  {}
-
-  /*************** Reporting interface ***************/
-  /* Report that join execution is started */
-  void report_join_start()
-  {
-    if (!total_actions && cur_action != 0)
-    {
-      /* This is a second execution */
-      total_actions= cur_action;
-    }
-    cur_action= 0;
-  }
-
-  /* 
-    Report that a temporary table is created. The next step is to write to the
-    this tmp. table
-  */
-  void report_tmp_table(TABLE *tbl);
-
-  /* 
-    Report that we are doing a filesort. 
-      @return 
-        Tracker object to be used with filesort
-  */
-  Filesort_tracker *report_sorting(THD *thd);
-  
-  /*
-    Report that remove_duplicates() is invoked [on a temp. table].
-    We don't collect any statistics on this operation, yet.
-  */
-  void report_duplicate_removal();
-  
-  friend class Iterator;
-  /*************** Statistics retrieval interface ***************/
-  bool had_varied_executions() { return varied_executions; }
-
-  class Iterator 
-  {
-    Sort_and_group_tracker *owner;
-    int idx;
-  public:
-    Iterator(Sort_and_group_tracker *owner_arg) : 
-      owner(owner_arg), idx(owner_arg->get_n_actions() - 1)
-    {}
-
-    enum_qep_action get_next(Filesort_tracker **tracker/*,
-                             enum_tmp_table_use *tmp_table_use*/)
-    {
-      /* Walk back through the array... */
-      if (idx < 0)
-        return EXPL_ACTION_EOF;
-      switch (owner->qep_actions[idx])
-      {
-        case EXPL_ACTION_FILESORT:
-          *tracker= owner->qep_actions_data[idx].filesort_tracker;
-          break;
-        case EXPL_ACTION_TEMPTABLE:
-          //*tmp_table_use= tmp_table_kind[tmp_table_idx++];
-          break;
-        default:
-          break;
-      }
-      return owner->qep_actions[idx--];
-    }
-
-    bool is_last_element() { return idx == -1; }
-  };
-};
-
diff --git a/sql/sql_array.h b/sql/sql_array.h
index 159951e26a6..7d6854690e7 100644
--- a/sql/sql_array.h
+++ b/sql/sql_array.h
@@ -85,6 +85,15 @@ public:
 
   Element_type *array() const { return m_array; }
 
+  bool operator==(const Bounds_checked_array<Element_type>&rhs) const
+  {
+    return m_array == rhs.m_array && m_size == rhs.m_size;
+  }
+  bool operator!=(const Bounds_checked_array<Element_type>&rhs) const
+  {
+    return m_array != rhs.m_array || m_size != rhs.m_size;
+  }
+
 private:
   Element_type *m_array;
   size_t        m_size;
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 08509a0e2bf..819f89d5ac4 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -7749,11 +7749,13 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 	       List<Item> *sum_func_list,
 	       uint wild_num)
 {
+  if (!wild_num)
+    return(0);
+
   Item *item;
   List_iterator<Item> it(fields);
   Query_arena *arena, backup;
   DBUG_ENTER("setup_wild");
-  DBUG_ASSERT(wild_num != 0);
 
   /*
     Don't use arena if we are not in prepared statements or stored procedures
@@ -7832,7 +7834,7 @@ int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 ** Check that all given fields exists and fill struct with current data
 ****************************************************************************/
 
-bool setup_fields(THD *thd, Item **ref_pointer_array,
+bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array,
                   List<Item> &fields, enum_mark_columns mark_used_columns,
                   List<Item> *sum_func_list, bool allow_sum_func)
 {
@@ -7842,7 +7844,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
   List_iterator<Item> it(fields);
   bool save_is_item_list_lookup;
   DBUG_ENTER("setup_fields");
-  DBUG_PRINT("enter", ("ref_pointer_array: %p", ref_pointer_array));
+  DBUG_PRINT("enter", ("ref_pointer_array: %p", ref_pointer_array.array()));
 
   thd->mark_used_columns= mark_used_columns;
   DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
@@ -7864,8 +7866,11 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
     TODO: remove it when (if) we made one list for allfields and
     ref_pointer_array
   */
-  if (ref_pointer_array)
-    bzero(ref_pointer_array, sizeof(Item *) * fields.elements);
+  if (!ref_pointer_array.is_null())
+  {
+    DBUG_ASSERT(ref_pointer_array.size() >= fields.elements);
+    memset(ref_pointer_array.array(), 0, sizeof(Item *) * fields.elements);
+  }
 
   /*
     We call set_entry() there (before fix_fields() of the whole list of field
@@ -7883,7 +7888,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
   while ((var= li++))
     var->set_entry(thd, FALSE);
 
-  Item **ref= ref_pointer_array;
+  Ref_ptr_array ref= ref_pointer_array;
   thd->lex->current_select->cur_pos_in_select_list= 0;
   while ((item= it++))
   {
@@ -7896,12 +7901,20 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
       DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
       DBUG_RETURN(TRUE); /* purecov: inspected */
     }
-    if (ref)
-      *(ref++)= item;
-    if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM &&
-	sum_func_list)
+    if (!ref.is_null())
+    {
+      ref[0]= item;
+      ref.pop_front();
+    }
+    /*
+      split_sum_func() must be called for Window Function items, see
+      Item_window_func::split_sum_func.
+    */
+    if ((item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM &&
+	 sum_func_list) || item->type() == Item::WINDOW_FUNC_ITEM)
       item->split_sum_func(thd, ref_pointer_array, *sum_func_list,
                            SPLIT_SUM_SELECT);
+    thd->lex->current_select->select_list_tables|= item->used_tables();
     thd->lex->used_tables|= item->used_tables();
     thd->lex->current_select->cur_pos_in_select_list++;
   }
@@ -8320,7 +8333,10 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
       views and natural joins this update is performed inside the loop below.
     */
     if (table)
+    {
       thd->lex->used_tables|= table->map;
+      thd->lex->current_select->select_list_tables|= table->map;
+    }
 
     /*
       Initialize a generic field iterator for the current table reference.
@@ -8412,6 +8428,8 @@ insert_fields(THD *thd, Name_resolution_context *context, const char *db_name,
           if (field_table)
           {
             thd->lex->used_tables|= field_table->map;
+            thd->lex->current_select->select_list_tables|=
+              field_table->map;
             field_table->covering_keys.intersect(field->part_of_key);
             field_table->merge_keys.merge(field->part_of_key);
             field_table->used_fields++;
diff --git a/sql/sql_base.h b/sql/sql_base.h
index b6e135b6feb..24a7c7a5a2e 100644
--- a/sql/sql_base.h
+++ b/sql/sql_base.h
@@ -168,7 +168,7 @@ void make_leaves_list(THD *thd, List<TABLE_LIST> &list, TABLE_LIST *tables,
                       bool full_table_list, TABLE_LIST *boundary);
 int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
 	       List<Item> *sum_func_list, uint wild_num);
-bool setup_fields(THD *thd, Item** ref_pointer_array,
+bool setup_fields(THD *thd, Ref_ptr_array ref_pointer_array,
                   List<Item> &item, enum_mark_columns mark_used_columns,
                   List<Item> *sum_func_list, bool allow_sum_func);
 void unfix_fields(List<Item> &items);
@@ -382,7 +382,7 @@ inline TABLE_LIST *find_table_in_local_list(TABLE_LIST *table,
 }
 
 
-inline bool setup_fields_with_no_wrap(THD *thd, Item **ref_pointer_array,
+inline bool setup_fields_with_no_wrap(THD *thd, Ref_ptr_array ref_pointer_array,
                                       List<Item> &item,
                                       enum_mark_columns mark_used_columns,
                                       List<Item> *sum_func_list,
diff --git a/sql/sql_class.h b/sql/sql_class.h
index be652e6dd01..f2b8481ff7f 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -4527,16 +4527,9 @@ inline uint tmp_table_max_key_parts() { return MI_MAX_KEY_SEG; }
 
 class TMP_TABLE_PARAM :public Sql_alloc
 {
-private:
-  /* Prevent use of these (not safe because of lists and copy_field) */
-  TMP_TABLE_PARAM(const TMP_TABLE_PARAM &);
-  void operator=(TMP_TABLE_PARAM &);
-
 public:
   List<Item> copy_funcs;
-  List<Item> save_copy_funcs;
   Copy_field *copy_field, *copy_field_end;
-  Copy_field *save_copy_field, *save_copy_field_end;
   uchar	    *group_buff;
   Item	    **items_to_copy;			/* Fields in tmp table */
   TMP_ENGINE_COLUMNDEF *recinfo, *start_recinfo;
@@ -4571,7 +4564,13 @@ public:
   uint  hidden_field_count;
   uint	group_parts,group_length,group_null_parts;
   uint	quick_group;
-  bool  using_indirect_summary_function;
+  /**
+    Enabled when we have atleast one outer_sum_func. Needed when used
+    along with distinct.
+
+    @see create_tmp_table
+  */
+  bool  using_outer_summary_function;
   CHARSET_INFO *table_charset;
   bool schema_table;
   /* TRUE if the temp table is created for subquery materialization. */
@@ -4601,9 +4600,10 @@ public:
   TMP_TABLE_PARAM()
     :copy_field(0), group_parts(0),
      group_length(0), group_null_parts(0),
-    schema_table(0), materialized_subquery(0), force_not_null_cols(0),
-    precomputed_group_by(0),
-    force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
+     using_outer_summary_function(0),
+     schema_table(0), materialized_subquery(0), force_not_null_cols(0),
+     precomputed_group_by(0),
+     force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
   {}
   ~TMP_TABLE_PARAM()
   {
@@ -4615,8 +4615,8 @@ public:
     if (copy_field)				/* Fix for Intel compiler */
     {
       delete [] copy_field;
-      save_copy_field= copy_field= NULL;
-      save_copy_field_end= copy_field_end= NULL;
+      copy_field= NULL;
+      copy_field_end= NULL;
     }
   }
 };
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 42e7f6c3569..0a76ee0a699 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -263,7 +263,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
      my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE");
      DBUG_RETURN(TRUE);
   }
-  if (!(table= table_list->table) || !table->created)
+  if (!(table= table_list->table) || !table->is_created())
   {
       my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
 	       table_list->view_db.str, table_list->view_name.str);
@@ -490,27 +490,30 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
 
   if (query_plan.using_filesort)
   {
-    uint         length= 0;
-    SORT_FIELD  *sortorder;
 
     {
+      Filesort fsort(order, HA_POS_ERROR, select);
       DBUG_ASSERT(query_plan.index == MAX_KEY);
       Filesort_tracker *fs_tracker= 
         thd->lex->explain->get_upd_del_plan()->filesort_tracker;
 
-      if (!(sortorder= make_unireg_sortorder(thd, order, &length, NULL)) ||
-	  !(file_sort= filesort(thd, table, sortorder, length,
-                                select, HA_POS_ERROR,
-                                true,
-                                fs_tracker)))
+      if (!(file_sort= filesort(thd, table, &fsort, true, fs_tracker)))
         goto got_error;
+
       thd->inc_examined_row_count(file_sort->examined_rows);
       /*
         Filesort has already found and selected the rows we want to delete,
         so we don't need the where clause
       */
       delete select;
-      free_underlaid_joins(thd, select_lex);
+
+      /*
+        If we are not in DELETE ... RETURNING, we can free subqueries. (in
+        DELETE ... RETURNING we can't, because the RETURNING part may have
+        a subquery in it)
+      */
+      if (!with_select)
+        free_underlaid_joins(thd, select_lex);
       select= 0;
     }
   }
@@ -737,7 +740,7 @@ got_error:
     wild_num            - number of wildcards used in optional SELECT clause 
     field_list          - list of items in optional SELECT clause
     conds		- conditions
-
+l
   RETURN VALUE
     FALSE OK
     TRUE  error
@@ -758,7 +761,8 @@ got_error:
                                     DELETE_ACL, SELECT_ACL, TRUE))
     DBUG_RETURN(TRUE);
   if ((wild_num && setup_wild(thd, table_list, field_list, NULL, wild_num)) ||
-      setup_fields(thd, NULL, field_list, MARK_COLUMNS_READ, NULL, 0) ||
+      setup_fields(thd, Ref_ptr_array(),
+                   field_list, MARK_COLUMNS_READ, NULL, 0) ||
       setup_conds(thd, table_list, select_lex->leaf_tables, conds) ||
       setup_ftfuncs(select_lex))
     DBUG_RETURN(TRUE);
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 1ef83b3bf1f..79e57cded81 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -477,7 +477,7 @@ exit_merge:
 unconditional_materialization:
   derived->change_refs_to_fields();
   derived->set_materialized_derived();
-  if (!derived->table || !derived->table->created)
+  if (!derived->table || !derived->table->is_created())
     res= mysql_derived_create(thd, lex, derived);
   if (!res)
     res= mysql_derived_fill(thd, lex, derived);
@@ -859,7 +859,7 @@ bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived)
   TABLE *table= derived->table;
   SELECT_LEX_UNIT *unit= derived->get_unit();
 
-  if (table->created)
+  if (table->is_created())
     DBUG_RETURN(FALSE);
   select_union *result= (select_union*)unit->result;
   if (table->s->db_type() == TMP_ENGINE_HTON)
@@ -912,7 +912,7 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
   if (unit->executed && !unit->uncacheable && !unit->describe)
     DBUG_RETURN(FALSE);
   /*check that table creation passed without problems. */
-  DBUG_ASSERT(derived->table && derived->table->created);
+  DBUG_ASSERT(derived->table && derived->table->is_created());
   SELECT_LEX *first_select= unit->first_select();
   select_union *derived_result= derived->derived_result;
   SELECT_LEX *save_current_select= lex->current_select;
@@ -928,7 +928,7 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
       first_select->options&= ~OPTION_FOUND_ROWS;
 
     lex->current_select= first_select;
-    res= mysql_select(thd, &first_select->ref_pointer_array,
+    res= mysql_select(thd,
                       first_select->table_list.first,
                       first_select->with_wild,
                       first_select->item_list, first_select->where,
diff --git a/sql/sql_do.cc b/sql/sql_do.cc
index 9e58031f6a4..6d86ece6a9f 100644
--- a/sql/sql_do.cc
+++ b/sql/sql_do.cc
@@ -29,7 +29,7 @@ bool mysql_do(THD *thd, List<Item> &values)
   List_iterator<Item> li(values);
   Item *value;
   DBUG_ENTER("mysql_do");
-  if (setup_fields(thd, 0, values, MARK_COLUMNS_NONE, 0, 0))
+  if (setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_NONE, 0, 0))
     DBUG_RETURN(TRUE);
   while ((value = li++))
     (void) value->is_null();
diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
index dae5127cbf8..ae54f8bd455 100644
--- a/sql/sql_explain.cc
+++ b/sql/sql_explain.cc
@@ -697,13 +697,6 @@ bool Explain_node::print_explain_json_cache(Json_writer *writer,
 }
 
 
-void Explain_select::replace_table(uint idx, Explain_table_access *new_tab)
-{
-  delete join_tabs[idx];
-  join_tabs[idx]= new_tab;
-}
-
-
 Explain_basic_join::~Explain_basic_join()
 {
   if (join_tabs)
@@ -754,35 +747,23 @@ int Explain_select::print_explain(Explain_query *query,
   }
   else
   {
-    bool using_tmp;
-    bool using_fs;
+    bool using_tmp= false;
+    bool using_fs= false;
 
-    if (is_analyze)
+    for (Explain_aggr_node *node= aggr_tree; node; node= node->child)
     {
-      /* 
-        Get the data about "Using temporary; Using filesort" from execution
-        tracking system.
-      */
-      using_tmp= false;
-      using_fs= false;
-      Sort_and_group_tracker::Iterator iter(&ops_tracker);
-      enum_qep_action action;
-      Filesort_tracker *dummy;
-
-      while ((action= iter.get_next(&dummy)) != EXPL_ACTION_EOF)
+      switch (node->get_type())
       {
-        if (action == EXPL_ACTION_FILESORT)
-          using_fs= true;
-        else if (action == EXPL_ACTION_TEMPTABLE)
+        case AGGR_OP_TEMP_TABLE:
           using_tmp= true;
+          break;
+        case AGGR_OP_FILESORT:
+          using_fs= true;
+          break;
+        default:
+          break;
       }
     }
-    else
-    {
-      /* Use imprecise "estimates" we got with the query plan */
-      using_tmp= using_temporary;
-      using_fs= using_filesort;
-    }
 
     for (uint i=0; i< n_join_tabs; i++)
     {
@@ -882,88 +863,34 @@ void Explain_select::print_explain_json(Explain_query *query,
       }
     }
 
-    Filesort_tracker *first_table_sort= NULL;
-    bool first_table_sort_used= false;
     int started_objects= 0;
+    
+    Explain_aggr_node *node= aggr_tree;
 
-    if (is_analyze)
-    {
-      /* ANALYZE has collected this part of query plan independently */
-      if (ops_tracker.had_varied_executions())
-      {
-        writer->add_member("varied-sort-and-tmp").start_object();
-        started_objects++;
-      }
-      else
-      {
-        Sort_and_group_tracker::Iterator iter(&ops_tracker);
-        enum_qep_action action;
-        Filesort_tracker *fs_tracker;
-
-        while ((action= iter.get_next(&fs_tracker)) != EXPL_ACTION_EOF)
-        {
-          if (action == EXPL_ACTION_FILESORT)
-          {
-            if (iter.is_last_element())
-            {
-              first_table_sort= fs_tracker;
-              break;
-            }
-            writer->add_member("filesort").start_object();
-            started_objects++;
-            fs_tracker->print_json_members(writer);
-          }
-          else if (action == EXPL_ACTION_TEMPTABLE)
-          {
-            writer->add_member("temporary_table").start_object();
-            started_objects++;
-            /*
-            if (tmp == EXPL_TMP_TABLE_BUFFER)
-              func= "buffer";
-            else if (tmp == EXPL_TMP_TABLE_GROUP)
-              func= "group-by";
-            else
-              func= "distinct";
-            writer->add_member("function").add_str(func);
-           */
-          }
-          else if (action == EXPL_ACTION_REMOVE_DUPS)
-          {
-            writer->add_member("duplicate_removal").start_object();
-            started_objects++;
-          }
-          else
-            DBUG_ASSERT(0);
-        }
-      }
-
-      if (first_table_sort)
-        first_table_sort_used= true;
-    }
-    else
+    for (; node; node= node->child)
     {
-      /* This is just EXPLAIN. Try to produce something meaningful */
-      if (using_temporary)
+      switch (node->get_type())
       {
-        started_objects= 1;
-        if (using_filesort)
+        case AGGR_OP_TEMP_TABLE:
+          writer->add_member("temporary_table").start_object();
+          break;
+        case AGGR_OP_FILESORT:
         {
-          started_objects++;
           writer->add_member("filesort").start_object();
+          if (is_analyze)
+            ((Explain_aggr_filesort*)node)->tracker->print_json_members(writer);
+          break;
         }
-        writer->add_member("temporary_table").start_object();
-        writer->add_member("function").add_str("buffer");
-      }
-      else
-      {
-        if (using_filesort)
-          first_table_sort_used= true;
+        case AGGR_OP_REMOVE_DUPLICATES:
+          writer->add_member("duplicate_removal").start_object();
+          break;
+        default:
+          DBUG_ASSERT(0);
       }
+      started_objects++;
     }
     
-    Explain_basic_join::print_explain_json_interns(query, writer, is_analyze,
-                                                   first_table_sort,
-                                                   first_table_sort_used);
+    Explain_basic_join::print_explain_json_interns(query, writer, is_analyze);
 
     for (;started_objects; started_objects--)
       writer->end_object();
@@ -983,7 +910,7 @@ void Explain_basic_join::print_explain_json(Explain_query *query,
   writer->add_member("query_block").start_object();
   writer->add_member("select_id").add_ll(select_id);
   
-  print_explain_json_interns(query, writer, is_analyze, NULL, false);
+  print_explain_json_interns(query, writer, is_analyze);
 
   writer->end_object();
 }
@@ -992,9 +919,7 @@ void Explain_basic_join::print_explain_json(Explain_query *query,
 void Explain_basic_join::
 print_explain_json_interns(Explain_query *query, 
                            Json_writer *writer, 
-                           bool is_analyze,
-                           Filesort_tracker *first_table_sort,
-                           bool first_table_sort_used)
+                           bool is_analyze)
 {
   Json_writer_nesting_guard guard(writer);
   for (uint i=0; i< n_join_tabs; i++)
@@ -1002,12 +927,7 @@ print_explain_json_interns(Explain_query *query,
     if (join_tabs[i]->start_dups_weedout)
       writer->add_member("duplicates_removal").start_object();
 
-    join_tabs[i]->print_explain_json(query, writer, is_analyze,
-                                     first_table_sort,
-                                     first_table_sort_used);
-
-    first_table_sort= NULL;
-    first_table_sort_used= false;
+    join_tabs[i]->print_explain_json(query, writer, is_analyze);
 
     if (join_tabs[i]->end_dups_weedout)
       writer->end_object();
@@ -1299,7 +1219,7 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai
     extra_buf.append(STRING_WITH_LEN("Using temporary"));
   }
 
-  if (using_filesort)
+  if (using_filesort || this->using_filesort)
   {
     if (first)
       first= false;
@@ -1498,13 +1418,11 @@ void add_json_keyset(Json_writer *writer, const char *elem_name,
 
 void Explain_table_access::print_explain_json(Explain_query *query,
                                               Json_writer *writer,
-                                              bool is_analyze,
-                                              Filesort_tracker *fs_tracker,
-                                              bool first_table_sort_used)
+                                              bool is_analyze)
 {
   Json_writer_nesting_guard guard(writer);
   
-  if (first_table_sort_used)
+  if (using_filesort)
   {
     /* filesort was invoked on this join tab before doing the join with the rest */
     writer->add_member("read_sorted_file").start_object();
@@ -1531,6 +1449,7 @@ void Explain_table_access::print_explain_json(Explain_query *query,
       }
     }
     writer->add_member("filesort").start_object();
+
     if (is_analyze)
       fs_tracker->print_json_members(writer);
   }
@@ -1723,7 +1642,7 @@ void Explain_table_access::print_explain_json(Explain_query *query,
     writer->end_object();
   }
 
-  if (first_table_sort_used)
+  if (using_filesort)
   {
     writer->end_object(); // filesort
     writer->end_object(); // read_sorted_file
diff --git a/sql/sql_explain.h b/sql/sql_explain.h
index 844773c4a47..9f8a0361cd5 100644
--- a/sql/sql_explain.h
+++ b/sql/sql_explain.h
@@ -176,9 +176,7 @@ public:
                           bool is_analyze);
 
   void print_explain_json_interns(Explain_query *query, Json_writer *writer,
-                                  bool is_analyze, 
-                                  Filesort_tracker *first_table_sort,
-                                  bool first_table_sort_used);
+                                  bool is_analyze);
 
   /* A flat array of Explain structs for tables. */
   Explain_table_access** join_tabs;
@@ -186,6 +184,7 @@ public:
 };
 
 
+class Explain_aggr_node;
 /*
   EXPLAIN structure for a SELECT.
   
@@ -212,15 +211,9 @@ public:
     having(NULL), having_value(Item::COND_UNDEF),
     using_temporary(false), using_filesort(false),
     time_tracker(is_analyze),
-    ops_tracker(is_analyze)
+    aggr_tree(NULL)
   {}
 
-  /*
-    This is used to save the results of "late" test_if_skip_sort_order() calls
-    that are made from JOIN::exec
-  */
-  void replace_table(uint idx, Explain_table_access *new_tab);
-
 public:
   const char *select_type;
 
@@ -244,9 +237,13 @@ public:
 
   /* ANALYZE members */
   Time_and_counter_tracker time_tracker;
-
-  Sort_and_group_tracker  ops_tracker;
   
+  /* 
+    Part of query plan describing sorting, temp.table usage, and duplicate 
+    removal
+  */
+  Explain_aggr_node* aggr_tree;
+
   int print_explain(Explain_query *query, select_result_sink *output, 
                     uint8 explain_flags, bool is_analyze);
   void print_explain_json(Explain_query *query, Json_writer *writer, 
@@ -260,6 +257,48 @@ private:
   Table_access_tracker using_temporary_read_tracker;
 };
 
+/////////////////////////////////////////////////////////////////////////////
+// EXPLAIN structures for ORDER/GROUP operations.
+/////////////////////////////////////////////////////////////////////////////
+typedef enum 
+{
+  AGGR_OP_TEMP_TABLE,
+  AGGR_OP_FILESORT,
+  //AGGR_OP_READ_SORTED_FILE, // need this?
+  AGGR_OP_REMOVE_DUPLICATES
+  //AGGR_OP_JOIN // Need this?
+} enum_explain_aggr_node_type;
+
+
+class Explain_aggr_node : public Sql_alloc
+{
+public:
+  virtual enum_explain_aggr_node_type get_type()= 0;
+  virtual ~Explain_aggr_node() {}
+  Explain_aggr_node *child;
+};
+
+class Explain_aggr_filesort : public Explain_aggr_node 
+{
+public:
+  enum_explain_aggr_node_type get_type() { return AGGR_OP_FILESORT; }
+  Filesort_tracker *tracker;
+};
+
+class Explain_aggr_tmp_table : public Explain_aggr_node
+{
+public:
+  enum_explain_aggr_node_type get_type() { return AGGR_OP_TEMP_TABLE; }
+};
+
+class Explain_aggr_remove_dups : public Explain_aggr_node
+{
+public:
+  enum_explain_aggr_node_type get_type() { return AGGR_OP_REMOVE_DUPLICATES; }
+};
+
+
+/////////////////////////////////////////////////////////////////////////////
 
 /* 
   Explain structure for a UNION.
@@ -617,7 +656,9 @@ public:
     where_cond(NULL),
     cache_cond(NULL),
     pushed_index_cond(NULL),
-    sjm_nest(NULL)
+    sjm_nest(NULL),
+    using_filesort(false),
+    fs_tracker(NULL)
   {}
   ~Explain_table_access() { delete sjm_nest; }
 
@@ -710,7 +751,9 @@ public:
   Item *pushed_index_cond;
 
   Explain_basic_join *sjm_nest;
-
+   
+  bool using_filesort;
+  Filesort_tracker *fs_tracker;
   /* ANALYZE members */
 
   /* Tracker for reading the table */
@@ -723,9 +766,7 @@ public:
                     uint select_id, const char *select_type,
                     bool using_temporary, bool using_filesort);
   void print_explain_json(Explain_query *query, Json_writer *writer,
-                          bool is_analyze, 
-                          Filesort_tracker *fs_tracker,
-                          bool first_table_sort_used);
+                          bool is_analyze);
 
 private:
   void append_tag_name(String *str, enum explain_extra_tag tag);
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index 9817b882bdd..6e40ac02274 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -258,7 +258,7 @@ static int check_insert_fields(THD *thd, TABLE_LIST *table_list,
     if (table_list->is_view())
       unfix_fields(fields);
 
-    res= setup_fields(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0);
+    res= setup_fields(thd, Ref_ptr_array(), fields, MARK_COLUMNS_WRITE, 0, 0);
 
     /* Restore the current context. */
     ctx_state.restore_state(context, table_list);
@@ -346,7 +346,8 @@ static int check_update_fields(THD *thd, TABLE_LIST *insert_table_list,
   }
 
   /* Check the fields we are going to modify */
-  if (setup_fields(thd, 0, update_fields, MARK_COLUMNS_WRITE, 0, 0))
+  if (setup_fields(thd, Ref_ptr_array(),
+                   update_fields, MARK_COLUMNS_WRITE, 0, 0))
     return -1;
 
   if (insert_table_list->is_view() &&
@@ -771,7 +772,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
       my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), counter);
       goto abort;
     }
-    if (setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0))
+    if (setup_fields(thd, Ref_ptr_array(), *values, MARK_COLUMNS_READ, 0, 0))
       goto abort;
     switch_to_nullable_trigger_fields(*values, table);
   }
@@ -1466,7 +1467,8 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
     table_list->next_local= 0;
     context->resolve_in_table_list_only(table_list);
 
-    res= (setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0) ||
+    res= (setup_fields(thd, Ref_ptr_array(),
+                       *values, MARK_COLUMNS_READ, 0, 0) ||
           check_insert_fields(thd, context->table_list, fields, *values,
                               !insert_into_view, 0, &map));
 
@@ -1482,7 +1484,8 @@ bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list,
     }
 
     if (!res)
-      res= setup_fields(thd, 0, update_values, MARK_COLUMNS_READ, 0, 0);
+      res= setup_fields(thd, Ref_ptr_array(),
+                        update_values, MARK_COLUMNS_READ, 0, 0);
 
     if (!res && duplic == DUP_UPDATE)
     {
@@ -3440,7 +3443,7 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
   */
   lex->current_select= &lex->select_lex;
 
-  res= (setup_fields(thd, 0, values, MARK_COLUMNS_READ, 0, 0) ||
+  res= (setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_READ, 0, 0) ||
         check_insert_fields(thd, table_list, *fields, values,
                             !insert_into_view, 1, &map));
 
@@ -3493,7 +3496,7 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
       table_list->next_name_resolution_table= 
         ctx_state.get_first_name_resolution_table();
 
-    res= res || setup_fields(thd, 0, *info.update_values,
+    res= res || setup_fields(thd, Ref_ptr_array(), *info.update_values,
                              MARK_COLUMNS_READ, 0, 0);
     if (!res)
     {
@@ -3622,7 +3625,7 @@ void select_insert::cleanup()
 select_insert::~select_insert()
 {
   DBUG_ENTER("~select_insert");
-  if (table && table->created)
+  if (table && table->is_created())
   {
     table->next_number_field=0;
     table->auto_increment_field_not_null= FALSE;
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 65257c9b2ce..6056b03f4eb 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -748,6 +748,14 @@ void lex_start(THD *thd)
   lex->stmt_var_list.empty();
   lex->proc_list.elements=0;
 
+  lex->save_group_list.empty();
+  lex->save_order_list.empty();
+  lex->win_ref= NULL;
+  lex->win_frame= NULL;
+  lex->frame_top_bound= NULL;
+  lex->frame_bottom_bound= NULL;
+  lex->win_spec= NULL;
+
   lex->is_lex_started= TRUE;
   DBUG_VOID_RETURN;
 }
@@ -2103,8 +2111,7 @@ void st_select_lex::init_query()
   parent_lex->push_context(&context, parent_lex->thd->mem_root);
   cond_count= between_count= with_wild= 0;
   max_equal_elems= 0;
-  ref_pointer_array= 0;
-  ref_pointer_array_size= 0;
+  ref_pointer_array.reset();
   select_n_where_fields= 0;
   select_n_reserved= 0;
   select_n_having_items= 0;
@@ -2122,8 +2129,11 @@ void st_select_lex::init_query()
   prep_leaf_list_state= UNINIT;
   have_merged_subqueries= FALSE;
   bzero((char*) expr_cache_may_be_used, sizeof(expr_cache_may_be_used));
+  select_list_tables= 0;
   m_non_agg_field_used= false;
   m_agg_func_used= false;
+  window_specs.empty();
+  window_funcs.empty();
 }
 
 void st_select_lex::init_select()
@@ -2650,7 +2660,7 @@ bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num)
                        select_n_having_items +
                        select_n_where_fields +
                        order_group_num) * 5;
-  if (ref_pointer_array != NULL)
+  if (!ref_pointer_array.is_null())
   {
     /*
       We need to take 'n_sum_items' into account when allocating the array,
@@ -2659,17 +2669,24 @@ bool st_select_lex::setup_ref_array(THD *thd, uint order_group_num)
       In the usual case we can reuse the array from the prepare phase.
       If we need a bigger array, we must allocate a new one.
     */
-    if (ref_pointer_array_size >= n_elems)
-    {
-      DBUG_PRINT("info", ("reusing old ref_array"));
+    if (ref_pointer_array.size() == n_elems)
       return false;
-    }
-  }
-  ref_pointer_array= static_cast<Item**>(arena->alloc(sizeof(Item*) * n_elems));
-  if (ref_pointer_array != NULL)
-    ref_pointer_array_size= n_elems;
 
-  return ref_pointer_array == NULL;
+    /*
+      We need to take 'n_sum_items' into account when allocating the array,
+      and this may actually increase during the optimization phase due to
+      MIN/MAX rewrite in Item_in_subselect::single_value_transformer.
+      In the usual case we can reuse the array from the prepare phase.
+      If we need a bigger array, we must allocate a new one.
+     */
+    if (ref_pointer_array.size() == n_elems)
+      return false;
+   }
+  Item **array= static_cast<Item**>(arena->alloc(sizeof(Item*) * n_elems));
+  if (array != NULL)
+    ref_pointer_array= Ref_ptr_array(array, n_elems);
+
+  return array == NULL;
 }
 
 
@@ -2734,8 +2751,8 @@ void st_select_lex::print_order(String *str,
       else
         (*order->item)->print(str, query_type);
     }
-    if (!order->asc)
-      str->append(STRING_WITH_LEN(" desc"));
+    if (order->direction == ORDER::ORDER_DESC)
+       str->append(STRING_WITH_LEN(" desc"));
     if (order->next)
       str->append(',');
   }
@@ -4177,9 +4194,11 @@ void SELECT_LEX::update_used_tables()
 
   Item *item;
   List_iterator_fast<Item> it(join->fields_list);
+  select_list_tables= 0;
   while ((item= it++))
   {
     item->update_used_tables();
+    select_list_tables|= item->used_tables();
   }
   Item_outer_ref *ref;
   List_iterator_fast<Item_outer_ref> ref_it(inner_refs_list);
@@ -4229,6 +4248,8 @@ void st_select_lex::update_correlated_cache()
   if (join->conds)
     is_correlated|= MY_TEST(join->conds->used_tables() & OUTER_REF_TABLE_BIT);
 
+  is_correlated|= join->having_is_correlated;
+
   if (join->having)
     is_correlated|= MY_TEST(join->having->used_tables() & OUTER_REF_TABLE_BIT);
 
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index c64ed6b8d5c..fcfa96023a4 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -28,6 +28,7 @@
 #include "mem_root_array.h"
 #include "sql_cmd.h"
 #include "sql_alter.h"                // Alter_info
+#include "sql_window.h"
 
 /* YACC and LEX Definitions */
 
@@ -47,6 +48,7 @@ class sys_var;
 class Item_func_match;
 class File_parser;
 class Key_part_spec;
+class Item_window_func;
 struct sql_digest_state;
 class With_clause;
 
@@ -721,6 +723,7 @@ public:
 };
 
 typedef class st_select_lex_unit SELECT_LEX_UNIT;
+typedef Bounds_checked_array<Item*> Ref_ptr_array;
 
 /*
   SELECT_LEX - store information of parsed SELECT statment
@@ -799,9 +802,9 @@ public:
   SQL_I_List<ORDER> order_list;   /* ORDER clause */
   SQL_I_List<ORDER> gorder_list;
   Item *select_limit, *offset_limit;  /* LIMIT clause parameters */
-  // Arrays of pointers to top elements of all_fields list
-  Item **ref_pointer_array;
-  size_t ref_pointer_array_size; // Number of elements in array.
+
+  /// Array of pointers to top elements of all_fields list
+  Ref_ptr_array ref_pointer_array;
 
   /*
     number of items in select_list and HAVING clause used to get number
@@ -898,6 +901,12 @@ public:
   */
   List<String> *prev_join_using;
 
+  /**
+    The set of those tables whose fields are referenced in the select list of
+    this select level.
+  */
+  table_map select_list_tables;
+
   /* namp of nesting SELECT visibility (for aggregate functions check) */
   nesting_map name_visibility_map;
 
@@ -1088,6 +1097,22 @@ public:
   }
   With_element *find_table_def_in_with_clauses(TABLE_LIST *table);
 
+  List<Window_spec> window_specs;
+  void prepare_add_window_spec(THD *thd);
+  bool add_window_def(THD *thd, LEX_STRING *win_name, LEX_STRING *win_ref,
+                      SQL_I_List<ORDER> win_partition_list,
+                      SQL_I_List<ORDER> win_order_list,
+                      Window_frame *win_frame);
+  bool add_window_spec(THD *thd, LEX_STRING *win_ref,
+                       SQL_I_List<ORDER> win_partition_list,
+                       SQL_I_List<ORDER> win_order_list,
+                       Window_frame *win_frame);
+  List<Item_window_func> window_funcs;
+  bool add_window_func(Item_window_func *win_func)
+  {
+    return window_funcs.push_back(win_func);
+  }
+
 private:
   bool m_non_agg_field_used;
   bool m_agg_func_used;
@@ -2756,6 +2781,14 @@ public:
   }
 
 
+  SQL_I_List<ORDER> save_group_list;
+  SQL_I_List<ORDER> save_order_list;
+  LEX_STRING *win_ref;
+  Window_frame *win_frame;
+  Window_frame_bound *frame_top_bound;
+  Window_frame_bound *frame_bottom_bound;
+  Window_spec *win_spec;
+
   inline void free_set_stmt_mem_root()
   {
     DBUG_ASSERT(!is_arena_for_set_stmt());
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index d43eb884abd..94b0fe72ac3 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -277,22 +277,25 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
       Let us also prepare SET clause, altough it is probably empty
       in this case.
     */
-    if (setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
-        setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
+    if (setup_fields(thd, Ref_ptr_array(),
+                     set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
+        setup_fields(thd, Ref_ptr_array(), set_values, MARK_COLUMNS_READ, 0, 0))
       DBUG_RETURN(TRUE);
   }
   else
   {						// Part field list
     /* TODO: use this conds for 'WITH CHECK OPTIONS' */
-    if (setup_fields(thd, 0, fields_vars, MARK_COLUMNS_WRITE, 0, 0) ||
-        setup_fields(thd, 0, set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
+    if (setup_fields(thd, Ref_ptr_array(),
+                     fields_vars, MARK_COLUMNS_WRITE, 0, 0) ||
+        setup_fields(thd, Ref_ptr_array(),
+                     set_fields, MARK_COLUMNS_WRITE, 0, 0) ||
         check_that_all_fields_are_given_values(thd, table, table_list))
       DBUG_RETURN(TRUE);
     /* Add all fields with default functions to table->write_set. */
     if (table->default_field)
       table->mark_default_fields_for_write();
     /* Fix the expressions in SET clause */
-    if (setup_fields(thd, 0, set_values, MARK_COLUMNS_READ, 0, 0))
+    if (setup_fields(thd, Ref_ptr_array(), set_values, MARK_COLUMNS_READ, 0, 0))
       DBUG_RETURN(TRUE);
   }
   switch_to_nullable_trigger_fields(fields_vars, table);
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 7c50e4ed680..79d284a5952 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -4521,7 +4521,7 @@ end_with_restore_list:
                                                lex->table_count);
       if (result)
       {
-        res= mysql_select(thd, &select_lex->ref_pointer_array,
+        res= mysql_select(thd,
                           select_lex->get_table_list(),
                           select_lex->with_wild,
                           select_lex->item_list,
@@ -7789,7 +7789,6 @@ add_proc_to_list(THD* thd, Item *item)
   item_ptr = (Item**) (order+1);
   *item_ptr= item;
   order->item=item_ptr;
-  order->free_me=0;
   thd->lex->proc_list.link_in_list(order, &order->next);
   return 0;
 }
@@ -7807,8 +7806,7 @@ bool add_to_list(THD *thd, SQL_I_List<ORDER> &list, Item *item,bool asc)
     DBUG_RETURN(1);
   order->item_ptr= item;
   order->item= &order->item_ptr;
-  order->asc = asc;
-  order->free_me=0;
+  order->direction= (asc ? ORDER::ORDER_ASC : ORDER::ORDER_DESC);
   order->used=0;
   order->counter_used= 0;
   order->fast_field_copier_setup= 0; 
@@ -8225,6 +8223,65 @@ TABLE_LIST *st_select_lex::convert_right_join()
   DBUG_RETURN(tab1);
 }
 
+
+void st_select_lex::prepare_add_window_spec(THD *thd)
+{
+  LEX *lex= thd->lex;
+  lex->save_group_list= group_list;
+  lex->save_order_list= order_list;
+  lex->win_ref= NULL;
+  lex->win_frame= NULL;
+  lex->frame_top_bound= NULL;
+  lex->frame_bottom_bound= NULL;
+  group_list.empty();
+  order_list.empty();
+}
+
+bool st_select_lex::add_window_def(THD *thd,
+                                   LEX_STRING *win_name,
+                                   LEX_STRING *win_ref,
+                                   SQL_I_List<ORDER> win_partition_list,
+                                   SQL_I_List<ORDER> win_order_list,
+                                   Window_frame *win_frame)
+{
+  SQL_I_List<ORDER> *win_part_list_ptr=
+    new (thd->mem_root) SQL_I_List<ORDER> (win_partition_list);
+  SQL_I_List<ORDER> *win_order_list_ptr=
+    new (thd->mem_root) SQL_I_List<ORDER> (win_order_list);
+  if (!(win_part_list_ptr && win_order_list_ptr))
+    return true;
+  Window_def *win_def= new (thd->mem_root) Window_def(win_name,
+                                                      win_ref,
+                                                      win_part_list_ptr,
+                                                      win_order_list_ptr,
+                                                      win_frame);
+  group_list= thd->lex->save_group_list;
+  order_list= thd->lex->save_order_list;
+  return (win_def == NULL || window_specs.push_back(win_def));
+}
+
+bool st_select_lex::add_window_spec(THD *thd, 
+                                    LEX_STRING *win_ref,
+                                    SQL_I_List<ORDER> win_partition_list,
+                                    SQL_I_List<ORDER> win_order_list,
+                                    Window_frame *win_frame)
+{
+  SQL_I_List<ORDER> *win_part_list_ptr=
+    new (thd->mem_root) SQL_I_List<ORDER> (win_partition_list);
+  SQL_I_List<ORDER> *win_order_list_ptr=
+    new (thd->mem_root) SQL_I_List<ORDER> (win_order_list);
+  if (!(win_part_list_ptr && win_order_list_ptr))
+    return true;
+  Window_spec *win_spec= new (thd->mem_root) Window_spec(win_ref,
+                                                         win_part_list_ptr,
+                                                         win_order_list_ptr,
+                                                         win_frame);
+  group_list= thd->lex->save_group_list;
+  order_list= thd->lex->save_order_list;
+  thd->lex->win_spec= win_spec;
+  return (win_spec == NULL || window_specs.push_back(win_spec));
+}
+
 /**
   Set lock for all tables in current select level.
 
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 8e5ab71288d..2d6a7302afc 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -1316,7 +1316,7 @@ static bool mysql_test_insert(Prepared_statement *stmt,
         my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), counter);
         goto error;
       }
-      if (setup_fields(thd, 0, *values, MARK_COLUMNS_NONE, 0, 0))
+      if (setup_fields(thd, Ref_ptr_array(), *values, MARK_COLUMNS_NONE, 0, 0))
         goto error;
     }
   }
@@ -1406,7 +1406,8 @@ static int mysql_test_update(Prepared_statement *stmt,
   table_list->register_want_access(want_privilege);
 #endif
   thd->lex->select_lex.no_wrap_view_item= TRUE;
-  res= setup_fields(thd, 0, select->item_list, MARK_COLUMNS_READ, 0, 0);
+  res= setup_fields(thd, Ref_ptr_array(),
+                    select->item_list, MARK_COLUMNS_READ, 0, 0);
   thd->lex->select_lex.no_wrap_view_item= FALSE;
   if (res)
     goto error;
@@ -1417,7 +1418,8 @@ static int mysql_test_update(Prepared_statement *stmt,
     (SELECT_ACL & ~table_list->table->grant.privilege);
   table_list->register_want_access(SELECT_ACL);
 #endif
-  if (setup_fields(thd, 0, stmt->lex->value_list, MARK_COLUMNS_NONE, 0, 0) ||
+  if (setup_fields(thd, Ref_ptr_array(),
+                   stmt->lex->value_list, MARK_COLUMNS_NONE, 0, 0) ||
       check_unique_table(thd, table_list))
     goto error;
   /* TODO: here we should send types of placeholders to the client. */
@@ -1463,7 +1465,7 @@ static bool mysql_test_delete(Prepared_statement *stmt,
     my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE");
     goto error;
   }
-  if (!table_list->table || !table_list->table->created)
+  if (!table_list->table || !table_list->table->is_created())
   {
     my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
              table_list->view_db.str, table_list->view_name.str);
@@ -1589,7 +1591,8 @@ static bool mysql_test_do_fields(Prepared_statement *stmt,
   if (open_normal_and_derived_tables(thd, tables, MYSQL_OPEN_FORCE_SHARED_MDL,
                                      DT_PREPARE | DT_CREATE))
     DBUG_RETURN(TRUE);
-  DBUG_RETURN(setup_fields(thd, 0, *values, MARK_COLUMNS_NONE, 0, 0));
+  DBUG_RETURN(setup_fields(thd, Ref_ptr_array(),
+                           *values, MARK_COLUMNS_NONE, 0, 0));
 }
 
 
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index cc56daacf3e..b15a80a889a 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -344,6 +344,7 @@ enum enum_parsing_place
   IN_WHERE,
   IN_ON,
   IN_GROUP_BY,
+  IN_ORDER_BY,
   PARSING_PLACE_SIZE /* always should be the last */
 };
 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c1916aae58a..cc1310f1632 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -54,6 +54,7 @@
 #include "sql_derived.h"
 #include "sql_statistics.h"
 #include "sql_cte.h"
+#include "sql_window.h"
 
 #include "debug_sync.h"          // DEBUG_SYNC
 #include <m_ctype.h>
@@ -117,7 +118,6 @@ static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void
 static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
 C_MODE_END
 static uint cache_record_length(JOIN *join,uint index);
-bool get_best_combination(JOIN *join);
 static store_key *get_store_key(THD *thd,
 				KEYUSE *keyuse, table_map used_tables,
 				KEY_PART_INFO *key_part, uchar *key_buff,
@@ -164,8 +164,11 @@ static COND *optimize_cond(JOIN *join, COND *conds,
                            COND_EQUAL **cond_equal,
                            int flags= 0);
 bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
-static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
-		     Procedure *proc);
+static int do_select(JOIN *join, Procedure *procedure);
+static bool instantiate_tmp_table(TABLE *table, KEY *keyinfo, 
+                                  MARIA_COLUMNDEF *start_recinfo,
+                                  MARIA_COLUMNDEF **recinfo, 
+                                  ulonglong options);
 
 static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int);
 static enum_nested_loop_state
@@ -179,7 +182,6 @@ end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 static enum_nested_loop_state
 end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
 
-static int test_if_group_changed(List<Cached_item> &list);
 static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos);
 static int join_read_system(JOIN_TAB *tab);
 static int join_read_const(JOIN_TAB *tab);
@@ -232,11 +234,7 @@ static bool list_contains_unique_index(TABLE *table,
                           bool (*find_func) (Field *, void *), void *data);
 static bool find_field_in_item_list (Field *field, void *data);
 static bool find_field_in_order_list (Field *field, void *data);
-static int create_sort_index(THD *thd, JOIN *join, ORDER *order,
-			     ha_rows filesort_limit, ha_rows select_limit,
-                             bool is_order_by);
-static int remove_duplicates(JOIN *join,TABLE *entry,List<Item> &fields,
-			     Item *having);
+int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort);
 static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
 				   Item *having);
 static int remove_dup_with_hash_index(THD *thd,TABLE *table,
@@ -245,7 +243,7 @@ static int remove_dup_with_hash_index(THD *thd,TABLE *table,
 static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
 static bool setup_new_fields(THD *thd, List<Item> &fields,
 			     List<Item> &all_fields, ORDER *new_order);
-static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
+static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
                                     ORDER *order, List<Item> &fields,
                                     List<Item> &all_fields,
 				    bool *all_order_by_fields_used);
@@ -256,12 +254,12 @@ static void calc_group_buffer(JOIN *join,ORDER *group);
 static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
 static bool alloc_group_fields(JOIN *join,ORDER *group);
 // Create list for using with tempory table
-static bool change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array,
+static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
 				     List<Item> &new_list1,
 				     List<Item> &new_list2,
 				     uint elements, List<Item> &items);
 // Create list for using with tempory table
-static bool change_refs_to_tmp_fields(THD *thd, Item **ref_pointer_array,
+static bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
 				      List<Item> &new_list1,
 				      List<Item> &new_list2,
 				      uint elements, List<Item> &items);
@@ -288,7 +286,7 @@ static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
                                         uint n_top_tabs_count, JOIN_TAB *tab);
 static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
                                      table_map rem_tables);
-
+void set_postjoin_aggr_write_func(JOIN_TAB *tab);
 #ifndef DBUG_OFF
 
 /*
@@ -364,7 +362,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
       every PS/SP execution new, we will not need reset this flag if 
       setup_tables_done_option changed for next rexecution
     */
-    res= mysql_select(thd, &select_lex->ref_pointer_array,
+    res= mysql_select(thd,
 		      select_lex->table_list.first,
 		      select_lex->with_wild, select_lex->item_list,
 		      select_lex->where,
@@ -455,7 +453,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
 
 bool
 fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                 Item **ref_pointer_array)
+               Ref_ptr_array ref_pointer_array)
 {
   Item_outer_ref *ref;
 
@@ -484,10 +482,9 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
       existing one. The change will lead to less operations for copying fields,
       smaller temporary tables and less data passed through filesort.
     */
-    if (ref_pointer_array && !ref->found_in_select_list)
+    if (!ref_pointer_array.is_null() && !ref->found_in_select_list)
     {
       int el= all_fields.elements;
-      DBUG_ASSERT(all_fields.elements <= select->ref_pointer_array_size);
       ref_pointer_array[el]= item;
       /* Add the field item to the select list of the current select. */
       all_fields.push_front(item, thd->mem_root);
@@ -495,7 +492,7 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
         If it's needed reset each Item_ref item that refers this field with
         a new reference taken from ref_pointer_array.
       */
-      item_ref= ref_pointer_array + el;
+      item_ref= &ref_pointer_array[el];
     }
 
     if (ref->in_sum_func)
@@ -533,6 +530,7 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
     if (!ref->fixed && ref->fix_fields(thd, 0))
       return TRUE;
     thd->lex->used_tables|= item->used_tables();
+    thd->lex->current_select->select_list_tables|= item->used_tables();
   }
   return false;
 }
@@ -615,22 +613,26 @@ void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex)
 /**
   Function to setup clauses without sum functions.
 */
-inline int setup_without_group(THD *thd, Item **ref_pointer_array,
-			       TABLE_LIST *tables,
-			       List<TABLE_LIST> &leaves,
-			       List<Item> &fields,
-			       List<Item> &all_fields,
-			       COND **conds,
-			       ORDER *order,
-			       ORDER *group,
-                               bool *hidden_group_fields,
-                               uint *reserved)
+static inline int
+setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
+                              TABLE_LIST *tables,
+                              List<TABLE_LIST> &leaves,
+                              List<Item> &fields,
+                              List<Item> &all_fields,
+                              COND **conds,
+                              ORDER *order,
+                              ORDER *group,
+                              List<Window_spec> &win_specs,
+		              List<Item_window_func> &win_funcs,
+                              bool *hidden_group_fields,
+                              uint *reserved)
 {
   int res;
+  enum_parsing_place save_place;
   st_select_lex *const select= thd->lex->current_select;
   nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
   /* 
-    Need to save the value, so we can turn off only any new non_agg_field_used
+    Need to stave the value, so we can turn off only any new non_agg_field_used
     additions coming from the WHERE
   */
   const bool saved_non_agg_field_used= select->non_agg_field_used();
@@ -650,11 +652,21 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array,
   select->set_non_agg_field_used(saved_non_agg_field_used);
 
   thd->lex->allow_sum_func|= (nesting_map)1 << select->nest_level;
+  
+  save_place= thd->lex->current_select->parsing_place;
+  thd->lex->current_select->parsing_place= IN_ORDER_BY;
   res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields,
                           order);
-  thd->lex->allow_sum_func&= ~((nesting_map)1 << select->nest_level);
+  thd->lex->current_select->parsing_place= save_place;
+   thd->lex->allow_sum_func&= ~((nesting_map)1 << select->nest_level);
+  save_place= thd->lex->current_select->parsing_place;
+  thd->lex->current_select->parsing_place= IN_GROUP_BY;
   res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields,
                           group, hidden_group_fields);
+  thd->lex->current_select->parsing_place= save_place;
+  thd->lex->allow_sum_func|= (nesting_map)1 << select->nest_level;
+  res= res || setup_windows(thd, ref_pointer_array, tables, fields, all_fields,
+                            win_specs, win_funcs);
   thd->lex->allow_sum_func= save_allow_sum_func;
   DBUG_RETURN(res);
 }
@@ -678,8 +690,7 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array,
     0   on success
 */
 int
-JOIN::prepare(Item ***rref_pointer_array,
-	      TABLE_LIST *tables_init,
+JOIN::prepare(TABLE_LIST *tables_init,
 	      uint wild_num, COND *conds_init, uint og_num,
 	      ORDER *order_init, bool skip_order_by,
               ORDER *group_init, Item *having_init,
@@ -783,24 +794,33 @@ JOIN::prepare(Item ***rref_pointer_array,
       tbl->table->maybe_null= 1;
   }
 
-  if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields,
-                              wild_num)) ||
-      select_lex->setup_ref_array(thd, og_num) ||
-      setup_fields(thd, (*rref_pointer_array), fields_list, MARK_COLUMNS_READ,
-		   &all_fields, 1) ||
-      setup_without_group(thd, (*rref_pointer_array), tables_list,
-			  select_lex->leaf_tables, fields_list,
-			  all_fields, &conds, order, group_list,
-			  &hidden_group_fields, &select_lex->select_n_reserved))
-    DBUG_RETURN(-1);				/* purecov: inspected */
+  if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num))
+    DBUG_RETURN(-1);
+  if (select_lex->setup_ref_array(thd, og_num))
+    DBUG_RETURN(-1);
 
-  ref_pointer_array= *rref_pointer_array;
+  ref_ptrs= ref_ptr_array_slice(0);
+  
+  enum_parsing_place save_place= thd->lex->current_select->parsing_place;
+  thd->lex->current_select->parsing_place= SELECT_LIST;
+  if (setup_fields(thd, ref_ptrs, fields_list, MARK_COLUMNS_READ,
+                   &all_fields, 1))
+    DBUG_RETURN(-1);
+  thd->lex->current_select->parsing_place= save_place;
 
+  if (setup_without_group(thd, ref_ptrs, tables_list,
+                          select_lex->leaf_tables, fields_list,
+                          all_fields, &conds, order, group_list,
+                          select_lex->window_specs,
+                          select_lex->window_funcs,
+                          &hidden_group_fields,
+                          &select_lex->select_n_reserved))
+    DBUG_RETURN(-1);
   /* Resolve the ORDER BY that was skipped, then remove it. */
   if (skip_order_by && select_lex !=
                        select_lex->master_unit()->global_parameters())
   {
-    if (setup_order(thd, (*rref_pointer_array), tables_list, fields_list,
+    if (setup_order(thd, ref_ptrs, tables_list, fields_list,
                     all_fields, select_lex->order_list.first))
       DBUG_RETURN(-1);
     select_lex->order_list.empty();
@@ -828,6 +848,12 @@ JOIN::prepare(Item ***rref_pointer_array,
     if (having_fix_rc || thd->is_error())
       DBUG_RETURN(-1);				/* purecov: inspected */
     thd->lex->allow_sum_func= save_allow_sum_func;
+
+    if (having->with_window_func)
+    {
+      my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
+      DBUG_RETURN(-1); 
+    }
   }
 
   With_clause *with_clause=select_lex->get_with_clause();
@@ -867,14 +893,14 @@ JOIN::prepare(Item ***rref_pointer_array,
         real_order= TRUE;
 
       if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM)
-        item->split_sum_func(thd, ref_pointer_array, all_fields, 0);
+        item->split_sum_func(thd, ref_ptrs, all_fields, 0);
     }
     if (!real_order)
       order= NULL;
   }
 
   if (having && having->with_sum_func)
-    having->split_sum_func2(thd, ref_pointer_array, all_fields,
+    having->split_sum_func2(thd, ref_ptrs, all_fields,
                             &having, SPLIT_SUM_SKIP_REGISTERED);
   if (select_lex->inner_sum_func_list)
   {
@@ -883,13 +909,13 @@ JOIN::prepare(Item ***rref_pointer_array,
     do
     { 
       item_sum= item_sum->next;
-      item_sum->split_sum_func2(thd, ref_pointer_array,
+      item_sum->split_sum_func2(thd, ref_ptrs,
                                 all_fields, item_sum->ref_by, 0);
     } while (item_sum != end);
   }
 
   if (select_lex->inner_refs_list.elements &&
-      fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array))
+      fix_inner_refs(thd, all_fields, select_lex, ref_ptrs))
     DBUG_RETURN(-1);
 
   if (group_list)
@@ -907,10 +933,9 @@ JOIN::prepare(Item ***rref_pointer_array,
       {
         Item_field *field= new (thd->mem_root) Item_field(thd, *(Item_field**)ord->item);
         int el= all_fields.elements;
-        DBUG_ASSERT(all_fields.elements <= select_lex->ref_pointer_array_size);
-        ref_pointer_array[el]= field;
+        ref_ptrs[el]= field;
         all_fields.push_front(field, thd->mem_root);
-        ord->item= ref_pointer_array + el;
+        ord->item= &ref_ptrs[el];
       }
     }
   }
@@ -1006,11 +1031,14 @@ bool JOIN::prepare_stage2()
 
   /* Init join struct */
   count_field_types(select_lex, &tmp_table_param, all_fields, 0);
-  ref_pointer_array_size= all_fields.elements*sizeof(Item*);
   this->group= group_list != 0;
 
   if (tmp_table_param.sum_func_count && !group_list)
+  {
     implicit_grouping= TRUE;
+    // Result will contain zero or one row - ordering is meaningless
+    order= NULL;
+  }
 
 #ifdef RESTRICTED_GROUP
   if (implicit_grouping)
@@ -1056,6 +1084,24 @@ int JOIN::optimize()
                       need_tmp,
                       !skip_sort_order && !no_order && (order || group_list),
                       select_distinct);
+    uint select_nr= select_lex->select_number;
+    JOIN_TAB *curr_tab= join_tab + top_join_tab_count;
+    for (uint i= 0; i < aggr_tables; i++, curr_tab++)
+    {
+      if (select_nr == INT_MAX) 
+      {
+        /* this is a fake_select_lex of a union */
+        select_nr= select_lex->master_unit()->first_select()->select_number;
+        curr_tab->tracker= thd->lex->explain->get_union(select_nr)->
+                           get_tmptable_read_tracker();
+      }
+      else
+      {
+        curr_tab->tracker= thd->lex->explain->get_select(select_nr)->
+                           get_using_temporary_read_tracker();
+      }
+    }
+    
   }
   return res;
 }
@@ -1344,6 +1390,8 @@ JOIN::optimize_inner()
   {
     DBUG_PRINT("info",("No tables"));
     error= 0;
+    if (make_aggr_tables_info())
+      DBUG_RETURN(1);
     goto setup_subq_exit;
   }
   error= -1;					// Error is sent to client
@@ -1355,7 +1403,6 @@ JOIN::optimize_inner()
     calling make_join_statistics() as this may call get_best_group_min_max()
     which needs a simplfied group_list.
   */
-  simple_group= 1;
   if (group_list && table_count == 1)
   {
     group_list= remove_const(this, group_list, conds,
@@ -1688,16 +1735,20 @@ JOIN::optimize_inner()
     tab= &join_tab[const_tables];
     if (order)
     {
-      skip_sort_order= test_if_skip_sort_order(tab, order, select_limit, 1, 
-        &tab->table->keys_in_use_for_order_by);
+      skip_sort_order=
+        test_if_skip_sort_order(tab, order, select_limit,
+                                true,           // no_changes
+                                &tab->table->keys_in_use_for_order_by);
     }
     if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array,
                                           order, fields_list, all_fields,
 				          &all_order_fields_used)))
     {
-      bool skip_group= (skip_sort_order &&
-        test_if_skip_sort_order(tab, group_list, select_limit, 1, 
-                                &tab->table->keys_in_use_for_group_by) != 0);
+      const bool skip_group=
+        skip_sort_order &&
+        test_if_skip_sort_order(tab, group_list, select_limit,
+                                  true,         // no_changes
+                                  &tab->table->keys_in_use_for_group_by);
       count_field_types(select_lex, &tmp_table_param, all_fields, 0);
       if ((skip_group && all_order_fields_used) ||
 	  select_limit == HA_POS_ERROR ||
@@ -1726,6 +1777,7 @@ JOIN::optimize_inner()
     else if (thd->is_fatal_error)			// End of memory
       DBUG_RETURN(1);
   }
+  simple_group= rollup.state == ROLLUP::STATE_NONE;
   if (group)
   {
     /*
@@ -1749,6 +1801,7 @@ JOIN::optimize_inner()
       group_optimized_away= 1;
     }
   }
+
   calc_group_buffer(this, group_list);
   send_group_parts= tmp_table_param.group_parts; /* Save org parts */
   if (procedure && procedure->group)
@@ -1790,6 +1843,10 @@ JOIN::optimize_inner()
   }
 
   need_tmp= test_if_need_tmp_table();
+  //TODO this could probably go in test_if_need_tmp_table.
+  if (this->select_lex->window_specs.elements > 0) {
+    need_tmp= TRUE;
+  }
 
   /*
     If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
@@ -1813,6 +1870,31 @@ JOIN::optimize_inner()
   if (!(select_options & SELECT_DESCRIBE))
     init_ftfuncs(thd, select_lex, MY_TEST(order));
 
+  /*
+    It's necessary to check const part of HAVING cond as
+    there is a chance that some cond parts may become
+    const items after make_join_statisctics(for example
+    when Item is a reference to cost table field from
+    outer join).
+    This check is performed only for those conditions
+    which do not use aggregate functions. In such case
+    temporary table may not be used and const condition
+    elements may be lost during further having
+    condition transformation in JOIN::exec.
+  */
+  if (having && const_table_map && !having->with_sum_func)
+  {
+    having->update_used_tables();
+    having= having->remove_eq_conds(thd, &select_lex->having_value, true);
+    if (select_lex->having_value == Item::COND_FALSE)
+    {
+      having= new (thd->mem_root) Item_int(thd, (longlong) 0,1);
+      zero_result_cause= "Impossible HAVING noticed after reading const tables";
+      error= 0;
+      DBUG_RETURN(0);
+    }
+  }
+
   if (optimize_unflattened_subqueries())
     DBUG_RETURN(1);
   
@@ -1839,8 +1921,28 @@ JOIN::optimize_inner()
 
   DBUG_EXECUTE("info",TEST_join(this););
 
-  if (const_tables != table_count)
+  if (!only_const_tables())
   {
+     JOIN_TAB *tab= &join_tab[const_tables];
+
+    if (order)
+    {
+      /*
+        Force using of tmp table if sorting by a SP or UDF function due to
+        their expensive and probably non-deterministic nature.
+      */
+      for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
+      {
+        Item *item= *tmp_order->item;
+        if (item->is_expensive())
+        {
+          /* Force tmp table without sort */
+          need_tmp=1; simple_order=simple_group=0;
+          break;
+        }
+      }
+    }
+
     /*
       Because filesort always does a full table scan or a quick range scan
       we must add the removed reference to the select for the table.
@@ -1848,72 +1950,199 @@ JOIN::optimize_inner()
       as in other cases the join is done before the sort.
     */
     if ((order || group_list) &&
-        join_tab[const_tables].type != JT_ALL &&
-        join_tab[const_tables].type != JT_FT &&
-        join_tab[const_tables].type != JT_REF_OR_NULL &&
+        tab->type != JT_ALL &&
+        tab->type != JT_FT &&
+        tab->type != JT_REF_OR_NULL &&
         ((order && simple_order) || (group_list && simple_group)))
     {
-      if (add_ref_to_table_cond(thd,&join_tab[const_tables])) {
+      if (add_ref_to_table_cond(thd,tab)) {
         DBUG_RETURN(1);
       }
     }
     /*
-      Calculate a possible 'limit' of table rows for 'GROUP BY': 'need_tmp'
-      implies that there will be more postprocessing so the specified
-      'limit' should not be enforced yet in the call to
-      'test_if_skip_sort_order'.
+      Investigate whether we may use an ordered index as part of either
+      DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be
+      used for only the first of any of these terms to be executed. This
+      is reflected in the order which we check for test_if_skip_sort_order()
+      below. However we do not check for DISTINCT here, as it would have
+      been transformed to a GROUP BY at this stage if it is a candidate for 
+      ordered index optimization.
+      If a decision was made to use an ordered index, the availability
+      of such an access path is stored in 'ordered_index_usage' for later
+      use by 'execute' or 'explain'
     */
-    const ha_rows limit = need_tmp ? HA_POS_ERROR : unit->select_limit_cnt;
+    DBUG_ASSERT(ordered_index_usage == ordered_index_void);
 
-    if (!(select_options & SELECT_BIG_RESULT) &&
-        ((group_list &&
-          (!simple_group ||
-           !test_if_skip_sort_order(&join_tab[const_tables], group_list,
-                                    limit, 0,
-                                    &join_tab[const_tables].table->
-                                    keys_in_use_for_group_by))) ||
-         select_distinct) &&
-        tmp_table_param.quick_group && !procedure)
-    {
-      need_tmp=1; simple_order=simple_group=0;	// Force tmp table without sort
-    }
-    if (order)
+    if (group_list)   // GROUP BY honoured first
+                      // (DISTINCT was rewritten to GROUP BY if skippable)
     {
       /*
-        Do we need a temporary table due to the ORDER BY not being equal to
-        the GROUP BY? The call to test_if_skip_sort_order above tests for the
-        GROUP BY clause only and hence is not valid in this case. So the
-        estimated number of rows to be read from the first table is not valid.
-        We clear it here so that it doesn't show up in EXPLAIN.
-       */
-      if (need_tmp && (select_options & SELECT_DESCRIBE) != 0)
-        join_tab[const_tables].limit= 0;
-      /*
-        Force using of tmp table if sorting by a SP or UDF function due to
-        their expensive and probably non-deterministic nature.
+        When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
+        and thus force sorting on disk unless a group min-max optimization
+        is going to be used as it is applied now only for one table queries
+        with covering indexes.
       */
-      for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
-      {
-        Item *item= *tmp_order->item;
-        if (item->is_expensive())
+      if (!(select_options & SELECT_BIG_RESULT) ||
+            (tab->select &&
+             tab->select->quick &&
+             tab->select->quick->get_type() ==
+             QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
+      {
+        if (simple_group &&              // GROUP BY is possibly skippable
+            !select_distinct)            // .. if not preceded by a DISTINCT
         {
-          /* Force tmp table without sort */
-          need_tmp=1; simple_order=simple_group=0;
-          break;
+          /*
+            Calculate a possible 'limit' of table rows for 'GROUP BY':
+            A specified 'LIMIT' is relative to the final resultset.
+            'need_tmp' implies that there will be more postprocessing 
+            so the specified 'limit' should not be enforced yet.
+           */
+          const ha_rows limit = need_tmp ? HA_POS_ERROR : select_limit;
+          if (test_if_skip_sort_order(tab, group_list, limit, false, 
+                                      &tab->table->keys_in_use_for_group_by))
+          {
+            ordered_index_usage= ordered_index_group_by;
+          }
+        }
+
+	/*
+	  If we are going to use semi-join LooseScan, it will depend
+	  on the selected index scan to be used.  If index is not used
+	  for the GROUP BY, we risk that sorting is put on the LooseScan
+	  table.  In order to avoid this, force use of temporary table.
+	  TODO: Explain the quick_group part of the test below.
+	 */
+        if ((ordered_index_usage != ordered_index_group_by) &&
+            (tmp_table_param.quick_group && !procedure || 
+	     (tab->emb_sj_nest && 
+	      best_positions[const_tables].sj_strategy == SJ_OPT_LOOSE_SCAN)))
+        {
+          need_tmp=1;
+          simple_order= simple_group= false; // Force tmp table without sort
         }
       }
     }
-  }
+    else if (order &&                      // ORDER BY wo/ preceeding GROUP BY
+             (simple_order || skip_sort_order)) // which is possibly skippable
+    {
+      if (test_if_skip_sort_order(tab, order, select_limit, false, 
+                                  &tab->table->keys_in_use_for_order_by))
+      {
+        ordered_index_usage= ordered_index_order_by;
+      }
+    }
+  }  
+
+  if (having)
+    having_is_correlated= MY_TEST(having->used_tables() & OUTER_REF_TABLE_BIT);
+  tmp_having= having;
 
   if ((select_lex->options & OPTION_SCHEMA_TABLE))
     optimize_schema_tables_reads(this);
 
   /*
+    The loose index scan access method guarantees that all grouping or
+    duplicate row elimination (for distinct) is already performed
+    during data retrieval, and that all MIN/MAX functions are already
+    computed for each group. Thus all MIN/MAX functions should be
+    treated as regular functions, and there is no need to perform
+    grouping in the main execution loop.
+    Notice that currently loose index scan is applicable only for
+    single table queries, thus it is sufficient to test only the first
+    join_tab element of the plan for its access method.
+  */
+  if (join_tab->is_using_loose_index_scan())
+  {
+    tmp_table_param.precomputed_group_by= TRUE;
+    if (join_tab->is_using_agg_loose_index_scan())
+    {
+      need_distinct= FALSE;
+      tmp_table_param.precomputed_group_by= FALSE;
+    }
+  }
+
+  if (make_aggr_tables_info())
+    DBUG_RETURN(1);
+
+  error= 0;
+
+  if (select_options & SELECT_DESCRIBE)
+    goto derived_exit;
+
+  DBUG_RETURN(0);
+
+setup_subq_exit:
+  /* Choose an execution strategy for this JOIN. */
+  if (!tables_list || !table_count)
+    choose_tableless_subquery_plan();
+  /*
+    Even with zero matching rows, subqueries in the HAVING clause may
+    need to be evaluated if there are aggregate functions in the query.
+  */
+  if (optimize_unflattened_subqueries())
+    DBUG_RETURN(1);
+  error= 0;
+
+derived_exit:
+
+  select_lex->mark_const_derived(zero_result_cause);
+  DBUG_RETURN(0);
+}
+
+
+/**
+  Set info for aggregation tables
+
+  @details
+  This function finalizes execution plan by taking following actions:
+    .) aggregation temporary tables are created, but not instantiated 
+       (this is done during execution).
+       JOIN_TABs for aggregation tables are set appropriately
+       (see JOIN::create_postjoin_aggr_table).
+    .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
+       each required stage of execution. These fields lists are set for
+       working tables' tabs and for the tab of last table in the join.
+    .) info for sorting/grouping/dups removal is prepared and saved in
+       appropriate tabs. Here is an example:
+
+  @returns
+  false - Ok
+  true  - Error
+*/
+
+bool JOIN::make_aggr_tables_info()
+{
+  List<Item> *curr_all_fields= &all_fields;
+  List<Item> *curr_fields_list= &fields_list;
+  JOIN_TAB *curr_tab= join_tab + const_tables;
+  bool materialize_join= false;
+  TABLE *exec_tmp_table= NULL;
+  bool distinct= false;
+  bool keep_row_order= false;
+  DBUG_ENTER("JOIN::make_aggr_tables_info");
+
+  const bool has_group_by= this->group;
+  
+  sort_and_group_aggr_tab= NULL;
+  
+
+  /*
+    Setup last table to provide fields and all_fields lists to the next
+    node in the plan.
+  */
+  if (join_tab)
+  {
+    join_tab[top_join_tab_count - 1].fields= &fields_list;
+    join_tab[top_join_tab_count - 1].all_fields= &all_fields;
+  }
+
+  /*
     All optimization is done. Check if we can use the storage engines
     group by handler to evaluate the group by
   */
-
-  if ((tmp_table_param.sum_func_count || group_list) && !procedure)
+  group_by_handler *gbh= NULL;
+  if (tables_list && (tmp_table_param.sum_func_count || group_list) &&
+      !procedure)
   {
     /*
       At the moment we only support push down for queries where
@@ -1933,24 +2162,39 @@ JOIN::optimize_inner()
       Query query= {&all_fields, select_distinct, tables_list, conds,
                     group_list, order ? order : group_list, having};
       group_by_handler *gbh= ht->create_group_by(thd, &query);
+
       if (gbh)
       {
         pushdown_query= new (thd->mem_root) Pushdown_query(select_lex, gbh);
-
         /*
           We must store rows in the tmp table if we need to do an ORDER BY
           or DISTINCT and the storage handler can't handle it.
         */
         need_tmp= query.order_by || query.group_by || query.distinct;
-        tmp_table_param.hidden_field_count= (all_fields.elements -
-                                             fields_list.elements);
-        if (!(exec_tmp_table1=
-              create_tmp_table(thd, &tmp_table_param, all_fields, 0,
-                               query.distinct, 1,
-                               select_options, HA_POS_ERROR, "",
-                               !need_tmp, query.order_by || query.group_by)))
+        distinct= query.distinct;
+        keep_row_order= query.order_by || query.group_by;
+        
+        order= query.order_by;
+
+        aggr_tables++;
+        curr_tab= join_tab + top_join_tab_count;
+        bzero(curr_tab, sizeof(JOIN_TAB));
+        curr_tab->ref.key= -1;
+        curr_tab->join= this;
+
+        curr_tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param);
+        TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param,
+                                       all_fields,
+                                       NULL, query.distinct,
+                                       TRUE, select_options, HA_POS_ERROR,
+                                       "", !need_tmp,
+                                       query.order_by || query.group_by);
+        if (!table)
           DBUG_RETURN(1);
 
+        curr_tab->aggr= new (thd->mem_root) AGGR_OP(curr_tab);
+        curr_tab->aggr->set_write_func(::end_send);
+        curr_tab->table= table;
         /*
           Setup reference fields, used by summary functions and group by fields,
           to point to the temporary table.
@@ -1959,17 +2203,18 @@ JOIN::optimize_inner()
           set_items_ref_array(items1).
         */
         init_items_ref_array();
-        items1= items0 + all_fields.elements;
+        items1= ref_ptr_array_slice(2);
+        //items1= items0 + all_fields.elements;
         if (change_to_use_tmp_fields(thd, items1,
                                      tmp_fields_list1, tmp_all_fields1,
                                      fields_list.elements, all_fields))
           DBUG_RETURN(1);
 
         /* Give storage engine access to temporary table */
-        gbh->table= exec_tmp_table1;
-
+        gbh->table= table;
         pushdown_query->store_data_in_temp_table= need_tmp;
         pushdown_query->having= having;
+
         /*
           Group by and having is calculated by the group_by handler.
           Reset the group by and having
@@ -1986,21 +2231,19 @@ JOIN::optimize_inner()
         tmp_table_param.field_count+= tmp_table_param.sum_func_count;
         tmp_table_param.sum_func_count= 0;
 
-        /* Remember information about the original join */
-        original_join_tab= join_tab;
-        original_table_count= table_count;
+        fields= curr_fields_list;
 
-        /* Set up one join tab to get sorting to work */
-        const_tables= 0;
-        table_count= 1;
-        join_tab= (JOIN_TAB*) thd->calloc(sizeof(JOIN_TAB));
-        join_tab[0].table= exec_tmp_table1;
+        //todo: new:
+        curr_tab->ref_array= &items1;
+        curr_tab->all_fields= &tmp_all_fields1;
+        curr_tab->fields= &tmp_fields_list1;
 
         DBUG_RETURN(thd->is_fatal_error);
       }
     }
   }
 
+
   /*
     The loose index scan access method guarantees that all grouping or
     duplicate row elimination (for distinct) is already performed
@@ -2012,194 +2255,576 @@ JOIN::optimize_inner()
     single table queries, thus it is sufficient to test only the first
     join_tab element of the plan for its access method.
   */
-  if (join_tab->is_using_loose_index_scan())
+  if (join_tab && join_tab->is_using_loose_index_scan())
+    tmp_table_param.precomputed_group_by=
+      !join_tab->is_using_agg_loose_index_scan();
+
+  group_list_for_estimates= group_list;
+  /* Create a tmp table if distinct or if the sort is too complicated */
+  if (need_tmp)
   {
-    tmp_table_param.precomputed_group_by= TRUE;
-    if (join_tab->is_using_agg_loose_index_scan())
+    aggr_tables++;
+    curr_tab= join_tab + top_join_tab_count;
+    bzero(curr_tab, sizeof(JOIN_TAB));
+    curr_tab->ref.key= -1;
+    if (only_const_tables())
+      first_select= sub_select_postjoin_aggr;
+
+    /*
+      Create temporary table on first execution of this join.
+      (Will be reused if this is a subquery that is executed several times.)
+    */
+    init_items_ref_array();
+
+    ORDER *tmp_group= (ORDER *) 0;
+    if (!simple_group && !procedure && !(test_flags & TEST_NO_KEY_GROUP))
+      tmp_group= group_list;
+
+    tmp_table_param.hidden_field_count= 
+      all_fields.elements - fields_list.elements;
+
+    distinct= select_distinct && !group_list;
+    keep_row_order= false;
+    if (create_postjoin_aggr_table(curr_tab,
+                                   &all_fields, tmp_group, 
+                                   group_list && simple_group,
+                                   distinct, keep_row_order))
+      DBUG_RETURN(true);
+    exec_tmp_table= curr_tab->table;
+
+    if (exec_tmp_table->distinct)
+      optimize_distinct();
+
+    /*
+      We don't have to store rows in temp table that doesn't match HAVING if:
+      - we are sorting the table and writing complete group rows to the
+        temp table.
+      - We are using DISTINCT without resolving the distinct as a GROUP BY
+        on all columns.
+
+      If having is not handled here, it will be checked before the row
+      is sent to the client.
+    */
+    if (having &&
+        (sort_and_group || (exec_tmp_table->distinct && !group_list)))
     {
-      need_distinct= FALSE;
-      tmp_table_param.precomputed_group_by= FALSE;
+      // Attach HAVING to tmp table's condition
+      curr_tab->having= having;
+      having= NULL; // Already done
     }
-  }
 
-  error= 0;
+   /* Change sum_fields reference to calculated fields in tmp_table */
+    items1= ref_ptr_array_slice(2);
+    if (sort_and_group || curr_tab->table->group ||
+        tmp_table_param.precomputed_group_by)
+    {
+      if (change_to_use_tmp_fields(thd, items1,
+                                   tmp_fields_list1, tmp_all_fields1,
+                                   fields_list.elements, all_fields))
+        DBUG_RETURN(true);
+    }
+    else
+    {
+      if (change_refs_to_tmp_fields(thd, items1,
+                                    tmp_fields_list1, tmp_all_fields1,
+                                    fields_list.elements, all_fields))
+        DBUG_RETURN(true);
+    }
+    curr_all_fields= &tmp_all_fields1;
+    curr_fields_list= &tmp_fields_list1;
+    // Need to set them now for correct group_fields setup, reset at the end.
+    set_items_ref_array(items1);
+    curr_tab->ref_array= &items1;
+    curr_tab->all_fields= &tmp_all_fields1;
+    curr_tab->fields= &tmp_fields_list1;
+    set_postjoin_aggr_write_func(curr_tab);
 
-  tmp_having= having;
-  if (select_options & SELECT_DESCRIBE)
-    goto derived_exit;
-  having= 0;
+    // psergey-todo: this is probably an incorrect place:
+    if (select_lex->window_funcs.elements)
+    {
+      curr_tab->window_funcs= new Window_funcs_computation;
+      if (curr_tab->window_funcs->setup(thd, &select_lex->window_funcs))
+        DBUG_RETURN(true);
+    }
 
-  DBUG_RETURN(0);
+    tmp_table_param.func_count= 0;
+    tmp_table_param.field_count+= tmp_table_param.func_count;
+    if (sort_and_group || curr_tab->table->group)
+    {
+      tmp_table_param.field_count+= tmp_table_param.sum_func_count;
+      tmp_table_param.sum_func_count= 0;
+    }
 
-setup_subq_exit:
-  /* Choose an execution strategy for this JOIN. */
-  if (!tables_list || !table_count)
-    choose_tableless_subquery_plan();
-  /*
-    Even with zero matching rows, subqueries in the HAVING clause may
-    need to be evaluated if there are aggregate functions in the query.
-  */
-  if (optimize_unflattened_subqueries())
-    DBUG_RETURN(1);
-  error= 0;
+    if (exec_tmp_table->group)
+    {						// Already grouped
+      if (!order && !no_order && !skip_sort_order)
+        order= group_list;  /* order by group */
+      group_list= NULL;
+    }
 
-derived_exit:
+    /*
+      If we have different sort & group then we must sort the data by group
+      and copy it to another tmp table
+      This code is also used if we are using distinct something
+      we haven't been able to store in the temporary table yet
+      like SEC_TO_TIME(SUM(...)).
+    */
+    if ((group_list &&
+         (!test_if_subpart(group_list, order) || select_distinct)) ||
+        (select_distinct && tmp_table_param.using_outer_summary_function))
+    {					/* Must copy to another table */
+      DBUG_PRINT("info",("Creating group table"));
+      
+      calc_group_buffer(this, group_list);
+      count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
+                        select_distinct && !group_list);
+      tmp_table_param.hidden_field_count= 
+        tmp_all_fields1.elements - tmp_fields_list1.elements;
+      
+      if (!exec_tmp_table->group && !exec_tmp_table->distinct)
+      {
+        // 1st tmp table were materializing join result
+        materialize_join= true;
+      }
+      curr_tab++;
+      aggr_tables++;
+      bzero(curr_tab, sizeof(JOIN_TAB));
+      curr_tab->ref.key= -1;
 
-  select_lex->mark_const_derived(zero_result_cause);
-  DBUG_RETURN(0);
-}
+      /* group data to new table */
+      /*
+        If the access method is loose index scan then all MIN/MAX
+        functions are precomputed, and should be treated as regular
+        functions. See extended comment above.
+      */
+      if (join_tab->is_using_loose_index_scan())
+        tmp_table_param.precomputed_group_by= TRUE;
 
+      tmp_table_param.hidden_field_count= 
+        curr_all_fields->elements - curr_fields_list->elements;
+      ORDER *dummy= NULL; //TODO can use table->group here also
 
-/**
-  Create and initialize objects neeed for the execution of a query plan.
-  Evaluate constant expressions not evaluated during optimization.
-*/
+      if (create_postjoin_aggr_table(curr_tab,
+                                     curr_all_fields, dummy, true,
+                                     distinct, keep_row_order))
+	DBUG_RETURN(true);
 
-int JOIN::init_execution()
-{
-  DBUG_ENTER("JOIN::init_execution");
+      if (group_list)
+      {
+        if (!only_const_tables())        // No need to sort a single row
+        {
+          if (add_sorting_to_table(curr_tab - 1, group_list))
+            DBUG_RETURN(true);
+        }
 
-  DBUG_ASSERT(optimized);
-  DBUG_ASSERT(!(select_options & SELECT_DESCRIBE));
-  initialized= true;
+        if (make_group_fields(this, this))
+          DBUG_RETURN(true);
+      }
 
-  /*
-    Enable LIMIT ROWS EXAMINED during query execution if:
-    (1) This JOIN is the outermost query (not a subquery or derived table)
-        This ensures that the limit is enabled when actual execution begins,
-        and not if a subquery is evaluated during optimization of the outer
-        query.
-    (2) This JOIN is not the result of a UNION. In this case do not apply the
-        limit in order to produce the partial query result stored in the
-        UNION temp table.
-  */
-  if (!select_lex->outer_select() &&                            // (1)
-      select_lex != select_lex->master_unit()->fake_select_lex) // (2)
-    thd->lex->set_limit_rows_examined();
+      // Setup sum funcs only when necessary, otherwise we might break info
+      // for the first table
+      if (group_list || tmp_table_param.sum_func_count)
+      {
+        if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
+          DBUG_RETURN(true);
+        if (prepare_sum_aggregators(sum_funcs,
+                                    !join_tab->is_using_agg_loose_index_scan()))
+          DBUG_RETURN(true);
+        group_list= NULL;
+        if (setup_sum_funcs(thd, sum_funcs))
+          DBUG_RETURN(true);
+      }
+      // No sum funcs anymore
+      DBUG_ASSERT(items2.is_null());
 
-  /* Create a tmp table if distinct or if the sort is too complicated */
-  if (need_tmp && !exec_tmp_table1)
-  {
-    DBUG_PRINT("info",("Creating tmp table"));
-    THD_STAGE_INFO(thd, stage_creating_tmp_table);
+      items2= ref_ptr_array_slice(3);
+      if (change_to_use_tmp_fields(thd, items2,
+                                   tmp_fields_list2, tmp_all_fields2, 
+                                   fields_list.elements, tmp_all_fields1))
+        DBUG_RETURN(true);
 
-    init_items_ref_array();
+      curr_fields_list= &tmp_fields_list2;
+      curr_all_fields= &tmp_all_fields2;
+      set_items_ref_array(items2);
+      curr_tab->ref_array= &items2;
+      curr_tab->all_fields= &tmp_all_fields2;
+      curr_tab->fields= &tmp_fields_list2;
+      set_postjoin_aggr_write_func(curr_tab);
 
-    tmp_table_param.hidden_field_count= (all_fields.elements -
-					 fields_list.elements);
-    ORDER *tmp_group= ((!simple_group && !procedure &&
-                        !(test_flags & TEST_NO_KEY_GROUP)) ? group_list :
-                                                             (ORDER*) 0);
-    /*
-      Pushing LIMIT to the temporary table creation is not applicable
-      when there is ORDER BY or GROUP BY or there is no GROUP BY, but
-      there are aggregate functions, because in all these cases we need
-      all result rows.
-    */
-    ha_rows tmp_rows_limit= ((order == 0 || skip_sort_order) &&
-                             !tmp_group &&
-                             !thd->lex->current_select->with_sum_func) ?
-                            select_limit : HA_POS_ERROR;
-
-    if (!(exec_tmp_table1=
-	  create_tmp_table(thd, &tmp_table_param, all_fields,
-                           tmp_group, group_list ? 0 : select_distinct,
-			   group_list && simple_group,
-			   select_options, tmp_rows_limit, "")))
-      DBUG_RETURN(1);
-    explain->ops_tracker.report_tmp_table(exec_tmp_table1);
-    /*
-      We don't have to store rows in temp table that doesn't match HAVING if:
-      - we are sorting the table and writing complete group rows to the
-        temp table.
-      - We are using DISTINCT without resolving the distinct as a GROUP BY
-        on all columns.
-      
-      If having is not handled here, it will be checked before the row
-      is sent to the client.
-    */    
-    if (tmp_having && 
-	(sort_and_group || (exec_tmp_table1->distinct && !group_list)))
-      having= tmp_having;
-
-    /* if group or order on first table, sort first */
-    if (group_list && simple_group)
-    {
-      DBUG_PRINT("info",("Sorting for group"));
-      THD_STAGE_INFO(thd, stage_sorting_for_group);
-      if (create_sort_index(thd, this, group_list,
-			    HA_POS_ERROR, HA_POS_ERROR, FALSE) ||
-	  alloc_group_fields(this, group_list) ||
-          make_sum_func_list(all_fields, fields_list, 1) ||
-          prepare_sum_aggregators(sum_funcs, need_distinct) ||
-          setup_sum_funcs(thd, sum_funcs))
+      tmp_table_param.field_count+= tmp_table_param.sum_func_count;
+      tmp_table_param.sum_func_count= 0;
+    }
+    if (curr_tab->table->distinct)
+      select_distinct= false;               /* Each row is unique */
+
+    if (select_distinct && !group_list)
+    {
+      if (having)
       {
-        DBUG_RETURN(1);
+        curr_tab->having= having;
+        having->update_used_tables();
       }
-      group_list=0;
+      curr_tab->distinct= true;
+      having= NULL;
+      select_distinct= false;
+    }
+    /* Clean tmp_table_param for the next tmp table. */
+    tmp_table_param.field_count= tmp_table_param.sum_func_count=
+      tmp_table_param.func_count= 0;
+
+    tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+    first_record= sort_and_group=0;
+
+    if (!group_optimized_away)
+    {
+      group= false;
     }
     else
     {
-      if (make_sum_func_list(all_fields, fields_list, 0) ||
-          prepare_sum_aggregators(sum_funcs, need_distinct) ||
-          setup_sum_funcs(thd, sum_funcs))
+      /*
+        If grouping has been optimized away, a temporary table is
+        normally not needed unless we're explicitly requested to create
+        one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
+
+        In this case (grouping was optimized away), temp_table was
+        created without a grouping expression and JOIN::exec() will not
+        perform the necessary grouping (by the use of end_send_group()
+        or end_write_group()) if JOIN::group is set to false.
+      */
+      // the temporary table was explicitly requested
+      DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT));
+      // the temporary table does not have a grouping expression
+      DBUG_ASSERT(!curr_tab->table->group); 
+    }
+    calc_group_buffer(this, group_list);
+    count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false);
+  }
+
+  if (group || implicit_grouping || tmp_table_param.sum_func_count)
+  {
+    if (make_group_fields(this, this))
+      DBUG_RETURN(true);
+
+    DBUG_ASSERT(items3.is_null());
+
+    if (items0.is_null())
+      init_items_ref_array();
+    items3= ref_ptr_array_slice(4);
+    setup_copy_fields(thd, &tmp_table_param,
+                      items3, tmp_fields_list3, tmp_all_fields3,
+                      curr_fields_list->elements, *curr_all_fields);
+
+    curr_fields_list= &tmp_fields_list3;
+    curr_all_fields= &tmp_all_fields3;
+    set_items_ref_array(items3);
+    if (join_tab)
+    {
+      JOIN_TAB *last_tab= join_tab + top_join_tab_count + aggr_tables - 1;
+      // Set grouped fields on the last table
+      last_tab->ref_array= &items3;
+      last_tab->all_fields= &tmp_all_fields3;
+      last_tab->fields= &tmp_fields_list3;
+    }
+    if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
+      DBUG_RETURN(true);
+    if (prepare_sum_aggregators(sum_funcs,
+                                !join_tab ||
+                                !join_tab-> is_using_agg_loose_index_scan()))
+      DBUG_RETURN(true);
+    if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error)
+      DBUG_RETURN(true);
+  }
+  if (group_list || order)
+  {
+    DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
+    THD_STAGE_INFO(thd, stage_sorting_result);
+    /* If we have already done the group, add HAVING to sorted table */
+    if (tmp_having && !group_list && !sort_and_group)
+    {
+      // Some tables may have been const
+      tmp_having->update_used_tables();
+      table_map used_tables= (const_table_map | curr_tab->table->map);
+
+      Item* sort_table_cond= make_cond_for_table(thd, tmp_having, used_tables,
+                                                 (table_map) 0, false,
+                                                 false, false);
+      if (sort_table_cond)
       {
-        DBUG_RETURN(1);
+	if (!curr_tab->select)
+	  if (!(curr_tab->select= new SQL_SELECT))
+	    DBUG_RETURN(true);
+	if (!curr_tab->select->cond)
+	  curr_tab->select->cond= sort_table_cond;
+	else
+	{
+	  if (!(curr_tab->select->cond=
+		new (thd->mem_root) Item_cond_and(thd, curr_tab->select->cond,
+				                      sort_table_cond)))
+	    DBUG_RETURN(true);
+	}
+        if (curr_tab->pre_idx_push_select_cond)
+	{
+          if (sort_table_cond->type() == Item::COND_ITEM)
+            sort_table_cond= sort_table_cond->copy_andor_structure(thd);           
+          if (!(curr_tab->pre_idx_push_select_cond= 
+                new (thd->mem_root) Item_cond_and(thd,
+                                                  curr_tab->pre_idx_push_select_cond,
+                                                  sort_table_cond)))
+            DBUG_RETURN(true);            
+        }
+        if (curr_tab->select->cond && !curr_tab->select->cond->fixed)
+	  curr_tab->select->cond->fix_fields(thd, 0);
+        if (curr_tab->pre_idx_push_select_cond &&
+            !curr_tab->pre_idx_push_select_cond->fixed)
+          curr_tab->pre_idx_push_select_cond->fix_fields(thd, 0);
+        curr_tab->select->pre_idx_push_select_cond=
+          curr_tab->pre_idx_push_select_cond;
+        curr_tab->set_select_cond(curr_tab->select->cond, __LINE__);
+        curr_tab->select_cond->top_level_item();
+	DBUG_EXECUTE("where",print_where(curr_tab->select->cond,
+					 "select and having",
+                                         QT_ORDINARY););
+
+        having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0,
+                                    ~used_tables, false, false, false);
+        DBUG_EXECUTE("where",
+                     print_where(having, "having after sort", QT_ORDINARY););
       }
+    }
 
-      if (!group_list && ! exec_tmp_table1->distinct && order && simple_order)
+    if (group)
+      select_limit= HA_POS_ERROR;
+    else if (!need_tmp)
+    {
+      /*
+        We can abort sorting after thd->select_limit rows if there are no
+        filter conditions for any tables after the sorted one.
+        Filter conditions come in several forms:
+         1. as a condition item attached to the join_tab, or
+         2. as a keyuse attached to the join_tab (ref access).
+      */
+      for (uint i= const_tables + 1; i < top_join_tab_count; i++)
       {
-        DBUG_PRINT("info",("Sorting for order"));
-        THD_STAGE_INFO(thd, stage_sorting_for_order);
-        if (create_sort_index(thd, this, order,
-                              HA_POS_ERROR, HA_POS_ERROR, TRUE))
+        JOIN_TAB *const tab= join_tab + i;
+        if (tab->select_cond ||                                // 1
+            (tab->keyuse && !tab->first_inner))                // 2
         {
-          DBUG_RETURN(1);
+          /* We have to sort all rows */
+          select_limit= HA_POS_ERROR;
+          break;
         }
-        order=0;
       }
     }
-    
     /*
-      Optimize distinct when used on some of the tables
-      SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b
-      In this case we can stop scanning t2 when we have found one t1.a
+      Here we add sorting stage for ORDER BY/GROUP BY clause, if the
+      optimiser chose FILESORT to be faster than INDEX SCAN or there is
+      no suitable index present.
+      OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
     */
+    DBUG_PRINT("info",("Sorting for order by/group by"));
+    ORDER *order_arg= group_list ?  group_list : order;
+    if (join_tab &&
+        ordered_index_usage !=
+        (group_list ? ordered_index_group_by : ordered_index_order_by) &&
+        curr_tab->type != JT_CONST &&
+        curr_tab->type != JT_EQ_REF) // Don't sort 1 row
+    {
+      // Sort either first non-const table or the last tmp table
+      JOIN_TAB *sort_tab= curr_tab;
+
+      if (add_sorting_to_table(sort_tab, order_arg))
+        DBUG_RETURN(true);
+      /*
+        filesort_limit:	 Return only this many rows from filesort().
+        We can use select_limit_cnt only if we have no group_by and 1 table.
+        This allows us to use Bounded_queue for queries like:
+          "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
+        m_select_limit == HA_POS_ERROR (we need a full table scan)
+        unit->select_limit_cnt == 1 (we only need one row in the result set)
+      */
+      sort_tab->filesort->limit=
+        (has_group_by || (join_tab + table_count > curr_tab + 1)) ?
+         select_limit : unit->select_limit_cnt;
+    }
+    if (!only_const_tables() &&
+        !join_tab[const_tables].filesort &&
+        !(select_options & SELECT_DESCRIBE))
+    {
+      /*
+        If no IO cache exists for the first table then we are using an
+        INDEX SCAN and no filesort. Thus we should not remove the sorted
+        attribute on the INDEX SCAN.
+      */
+      skip_sort_order= true;
+    }
+  }
+  fields= curr_fields_list;
+  // Reset before execution
+  set_items_ref_array(items0);
+  if (join_tab)
+    join_tab[top_join_tab_count + aggr_tables - 1].next_select=
+      setup_end_select_func(this, NULL);
+  group= has_group_by;
+
+  DBUG_RETURN(false);
+}
+
 
-    if (exec_tmp_table1->distinct)
+
+bool
+JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
+                                 ORDER *table_group,
+                                 bool save_sum_fields,
+                                 bool distinct,
+                                 bool keep_row_order)
+{
+  DBUG_ENTER("JOIN::create_postjoin_aggr_table");
+  THD_STAGE_INFO(thd, stage_creating_tmp_table);
+
+  /*
+    Pushing LIMIT to the post-join temporary table creation is not applicable
+    when there is ORDER BY or GROUP BY or there is no GROUP BY, but
+    there are aggregate functions, because in all these cases we need
+    all result rows.
+  */
+  ha_rows table_rows_limit= ((order == NULL || skip_sort_order) &&
+                              !table_group &&
+                              !select_lex->with_sum_func) ?
+                              select_limit : HA_POS_ERROR;
+
+  tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param);
+  tab->tmp_table_param->skip_create_table= true;
+  TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *table_fields,
+                                 table_group, distinct,
+                                 save_sum_fields, select_options, table_rows_limit, 
+                                 "", true, keep_row_order);
+  if (!table)
+    DBUG_RETURN(true);
+  tmp_table_param.using_outer_summary_function=
+    tab->tmp_table_param->using_outer_summary_function;
+  tab->join= this;
+  DBUG_ASSERT(tab > tab->join->join_tab);
+  (tab - 1)->next_select= sub_select_postjoin_aggr;
+  tab->aggr= new (thd->mem_root) AGGR_OP(tab);
+  if (!tab->aggr)
+    goto err;
+  tab->table= table;
+  table->reginfo.join_tab= tab;
+
+  /* if group or order on first table, sort first */
+  if (group_list && simple_group)
+  {
+    DBUG_PRINT("info",("Sorting for group"));
+    THD_STAGE_INFO(thd, stage_sorting_for_group);
+
+    if (ordered_index_usage != ordered_index_group_by &&
+        (join_tab + const_tables)->type != JT_CONST && // Don't sort 1 row
+        add_sorting_to_table(join_tab + const_tables, group_list))
+      goto err;
+
+    if (alloc_group_fields(this, group_list))
+      goto err;
+    if (make_sum_func_list(all_fields, fields_list, true))
+      goto err;
+    if (prepare_sum_aggregators(sum_funcs,
+                                !join_tab->is_using_agg_loose_index_scan()))
+      goto err;
+    if (setup_sum_funcs(thd, sum_funcs))
+      goto err;
+    group_list= NULL;
+  }
+  else
+  {
+    if (make_sum_func_list(all_fields, fields_list, false))
+      goto err;
+    if (prepare_sum_aggregators(sum_funcs,
+                                !join_tab->is_using_agg_loose_index_scan()))
+      goto err;
+    if (setup_sum_funcs(thd, sum_funcs))
+      goto err;
+
+    if (!group_list && !table->distinct && order && simple_order)
     {
-      table_map used_tables= select_list_used_tables;
-      JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1;
-      do
-      {
-	if (used_tables & last_join_tab->table->map ||
-            last_join_tab->use_join_cache)
-	  break;
-	last_join_tab->shortcut_for_distinct= true;
-      } while (last_join_tab-- != join_tab);
-      /* Optimize "select distinct b from t1 order by key_part_1 limit #" */
-      if (order && skip_sort_order)
-      {
- 	/* Should always succeed */
-	if (test_if_skip_sort_order(&join_tab[const_tables],
-				    order, unit->select_limit_cnt, 0, 
-                                    &join_tab[const_tables].table->
-                                      keys_in_use_for_order_by))
-	  order=0;
-        join_tab[const_tables].update_explain_data(const_tables);
-      }
+      DBUG_PRINT("info",("Sorting for order"));
+      THD_STAGE_INFO(thd, stage_sorting_for_order);
+
+      if (ordered_index_usage != ordered_index_order_by &&
+          add_sorting_to_table(join_tab + const_tables, order))
+        goto err;
+      order= NULL;
     }
+  }
+
+  DBUG_RETURN(false);
 
-    /* If this join belongs to an uncacheable query save the original join */
-    if (select_lex->uncacheable && init_save_join_tab())
-      DBUG_RETURN(-1);                         /* purecov: inspected */
+err:
+  if (table != NULL)
+    free_tmp_table(thd, table);
+  DBUG_RETURN(true);
+}
+
+
+void
+JOIN::optimize_distinct()
+{
+  for (JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; ;)
+  {
+    if (select_lex->select_list_tables & last_join_tab->table->map ||
+        last_join_tab->use_join_cache)
+      break;
+    last_join_tab->shortcut_for_distinct= true;
+    if (last_join_tab == join_tab)
+      break;
+    --last_join_tab;
   }
 
-  DBUG_RETURN(0);
+  /* Optimize "select distinct b from t1 order by key_part_1 limit #" */
+  if (order && skip_sort_order)
+  {
+    /* Should already have been optimized away */
+    DBUG_ASSERT(ordered_index_usage == ordered_index_order_by);
+    if (ordered_index_usage == ordered_index_order_by)
+    {
+      order= NULL;
+    }
+  }
 }
 
 
 /**
+  @brief Add Filesort object to the given table to sort if with filesort
+
+  @param tab   the JOIN_TAB object to attach created Filesort object to
+  @param order List of expressions to sort the table by
+
+  @note This function moves tab->select, if any, to filesort->select
+
+  @return false on success, true on OOM
+*/
+
+bool
+JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
+{
+  tab->filesort= new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->select);
+  if (!tab->filesort)
+    return true;
+  /*
+    Select was moved to filesort->select to force join_init_read_record to use
+    sorted result instead of reading table through select.
+  */
+  if (tab->select)
+  {
+    tab->select= NULL;
+    tab->set_select_cond(NULL, __LINE__);
+  }
+  tab->read_first_record= join_init_read_record;
+  return false;
+}
+
+
+
+
+/**
   Setup expression caches for subqueries that need them
 
   @details
@@ -2291,17 +2916,6 @@ bool JOIN::setup_subquery_caches()
 }
 
 
-/**
-  Restore values in temporary join.
-*/
-void JOIN::restore_tmp()
-{
-  DBUG_PRINT("info", ("restore_tmp this %p tmp_join %p", this, tmp_join));
-  DBUG_ASSERT(tmp_join != this);
-  memcpy(tmp_join, this, (size_t) sizeof(JOIN));
-}
-
-
 /*
   Shrink join buffers used for preceding tables to reduce the occupied space
 
@@ -2366,25 +2980,29 @@ JOIN::reinit()
   unit->offset_limit_cnt= (ha_rows)(select_lex->offset_limit ?
                                     select_lex->offset_limit->val_uint() : 0);
 
-  first_record= 0;
+  first_record= false;
+  group_sent= false;
   cleaned= false;
 
-  if (exec_tmp_table1)
-  {
-    exec_tmp_table1->file->extra(HA_EXTRA_RESET_STATE);
-    exec_tmp_table1->file->ha_delete_all_rows();
-  }
-  if (exec_tmp_table2)
+  if (aggr_tables)
   {
-    exec_tmp_table2->file->extra(HA_EXTRA_RESET_STATE);
-    exec_tmp_table2->file->ha_delete_all_rows();
+    JOIN_TAB *curr_tab= join_tab + top_join_tab_count;
+    JOIN_TAB *end_tab= curr_tab + aggr_tables;
+    for ( ; curr_tab < end_tab; curr_tab++)
+    {
+      TABLE *tmp_table= curr_tab->table;
+      if (!tmp_table->is_created())
+        continue;
+      tmp_table->file->extra(HA_EXTRA_RESET_STATE);
+      tmp_table->file->ha_delete_all_rows();
+    }
   }
   clear_sj_tmp_tables(this);
-  if (items0)
+  if (current_ref_ptrs != items0)
+  {
     set_items_ref_array(items0);
-
-  if (join_tab_save)
-    memcpy(join_tab, join_tab_save, sizeof(JOIN_TAB) * table_count);
+    set_group_rpa= false;
+  }
 
   /* need to reset ref access state (see join_read_key) */
   if (join_tab)
@@ -2397,9 +3015,6 @@ JOIN::reinit()
     }
   }
 
-  if (tmp_join)
-    restore_tmp();
-
   /* Reset of sum functions */
   if (sum_funcs)
   {
@@ -2424,38 +3039,40 @@ JOIN::reinit()
   DBUG_RETURN(0);
 }
 
+
 /**
-   @brief Save the original join layout
-      
-   @details Saves the original join layout so it can be reused in 
-   re-execution and for EXPLAIN.
-             
-   @return Operation status
-   @retval 0      success.
-   @retval 1      error occurred.
+  Prepare join result.
+
+  @details Prepare join result prior to join execution or describing.
+  Instantiate derived tables and get schema tables result if necessary.
+
+  @return
+    TRUE  An error during derived or schema tables instantiation.
+    FALSE Ok
 */
 
-bool
-JOIN::init_save_join_tab()
+bool JOIN::prepare_result(List<Item> **columns_list)
 {
-  if (!(tmp_join= (JOIN*)thd->alloc(sizeof(JOIN))))
-    return 1;                                  /* purecov: inspected */
-  error= 0;				       // Ensure that tmp_join.error= 0
-  restore_tmp();
-  return 0;
-}
+  DBUG_ENTER("JOIN::prepare_result");
 
+  error= 0;
+  /* Create result tables for materialized views. */
+  if (!zero_result_cause &&
+      select_lex->handle_derived(thd->lex, DT_CREATE))
+    goto err;
 
-bool
-JOIN::save_join_tab()
-{
-  if (!join_tab_save && select_lex->master_unit()->uncacheable)
-  {
-    if (!(join_tab_save= (JOIN_TAB*)thd->memdup((uchar*) join_tab,
-						sizeof(JOIN_TAB) * table_count)))
-      return 1;
-  }
-  return 0;
+  if (result->prepare2())
+    goto err;
+
+  if ((select_lex->options & OPTION_SCHEMA_TABLE) &&
+      get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
+    goto err;
+
+  DBUG_RETURN(FALSE);
+
+err:
+  error= 1;
+  DBUG_RETURN(TRUE);
 }
 
 
@@ -2496,6 +3113,14 @@ void JOIN::save_explain_data(Explain_query *output, bool can_overwrite,
     Explain_union *eu= output->get_union(nr);
     explain= &eu->fake_select_lex_explain;
     join_tab[0].tracker= eu->get_fake_select_lex_tracker();
+    for (int i=0 ; i < top_join_tab_count + aggr_tables; i++)
+    {
+      if (join_tab[i].filesort)
+      {
+        join_tab[i].filesort->tracker= 
+          new Filesort_tracker(thd->lex->analyze_stmt);
+      }
+    }
   }
 }
 
@@ -2509,7 +3134,6 @@ void JOIN::exec()
                         dbug_serve_apcs(thd, 1);
                  );
   ANALYZE_START_TRACKING(&explain->time_tracker);
-  explain->ops_tracker.report_join_start();
   exec_inner();
   ANALYZE_STOP_TRACKING(&explain->time_tracker);
 
@@ -2522,29 +3146,26 @@ void JOIN::exec()
 }
 
 
-/**
-  Exec select.
-
-  @todo
-    Note, that create_sort_index calls test_if_skip_sort_order and may
-    finally replace sorting with index scan if there is a LIMIT clause in
-    the query.  It's never shown in EXPLAIN!
-
-  @todo
-    When can we have here thd->net.report_error not zero?
-*/
-
 void JOIN::exec_inner()
 {
   List<Item> *columns_list= &fields_list;
-  int      tmp_error;
+  DBUG_ENTER("JOIN::exec_inner");
 
-  DBUG_ENTER("JOIN::exec");
+  THD_STAGE_INFO(thd, stage_executing);
 
-  const bool has_group_by= this->group;
+  /*
+    Enable LIMIT ROWS EXAMINED during query execution if:
+    (1) This JOIN is the outermost query (not a subquery or derived table)
+        This ensures that the limit is enabled when actual execution begins, and
+        not if a subquery is evaluated during optimization of the outer query.
+    (2) This JOIN is not the result of a UNION. In this case do not apply the
+        limit in order to produce the partial query result stored in the
+        UNION temp table.
+  */
+  if (!select_lex->outer_select() &&                            // (1)
+      select_lex != select_lex->master_unit()->fake_select_lex) // (2)
+    thd->lex->set_limit_rows_examined();
 
-  THD_STAGE_INFO(thd, stage_executing);
-  error= 0;
   if (procedure)
   {
     procedure_fields_list= fields_list;
@@ -2565,13 +3186,16 @@ void JOIN::exec_inner()
     if (select_options & SELECT_DESCRIBE)
       select_describe(this, FALSE, FALSE, FALSE,
 		      (zero_result_cause?zero_result_cause:"No tables used"));
+
     else
     {
       if (result->send_result_set_metadata(*columns_list,
-                                           Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
+                                           Protocol::SEND_NUM_ROWS |
+                                           Protocol::SEND_EOF))
       {
         DBUG_VOID_RETURN;
       }
+
       /*
         We have to test for 'conds' here as the WHERE may not be constant
         even if we don't have any tables for prepared statements or if
@@ -2638,7 +3262,7 @@ void JOIN::exec_inner()
 			    having ? having : tmp_having, all_fields);
     DBUG_VOID_RETURN;
   }
-
+  
   /*
     Evaluate all constant expressions with subqueries in the
     ORDER/GROUP clauses to make sure that all subqueries return a
@@ -2667,42 +3291,6 @@ void JOIN::exec_inner()
 
   if (select_options & SELECT_DESCRIBE)
   {
-    /*
-      Check if we managed to optimize ORDER BY away and don't use temporary
-      table to resolve ORDER BY: in that case, we only may need to do
-      filesort for GROUP BY.
-    */
-    if (!order && !no_order && (!skip_sort_order || !need_tmp))
-    {
-      /*
-	Reset 'order' to 'group_list' and reinit variables describing
-	'order'
-      */
-      order= group_list;
-      simple_order= simple_group;
-      skip_sort_order= 0;
-    }
-    if (order && join_tab)
-    {
-      bool made_call= false;
-      SQL_SELECT *tmp_select= join_tab[const_tables].select;
-      if ((order != group_list ||
-           !(select_options & SELECT_BIG_RESULT) ||
-           (tmp_select && tmp_select->quick &&
-            tmp_select->quick->get_type() ==
-            QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) &&
-          (const_tables == table_count ||
-           ((simple_order || skip_sort_order) &&
-            (made_call=true) &&
-            test_if_skip_sort_order(&join_tab[const_tables], order,
-                                    select_limit, 0, 
-                                    &join_tab[const_tables].table->
-                                    keys_in_use_for_query))))
-        order=0;
-      if (made_call)
-        join_tab[const_tables].update_explain_data(const_tables);
-    }
-    having= tmp_having;
     select_describe(this, need_tmp,
 		    order != 0 && !skip_sort_order,
 		    select_distinct,
@@ -2715,537 +3303,31 @@ void JOIN::exec_inner()
     select_lex->mark_const_derived(zero_result_cause);
   }
 
-  if (!initialized && init_execution())
-    DBUG_VOID_RETURN;
-
-  JOIN *curr_join= this;
-  List<Item> *curr_all_fields= &all_fields;
-  List<Item> *curr_fields_list= &fields_list;
-  TABLE *curr_tmp_table= 0;
-  /*
-    curr_join->join_free() will call JOIN::cleanup(full=TRUE). It will not 
-    be safe to call update_used_tables() after that.
-  */
-  if (curr_join->tmp_having)
-    curr_join->tmp_having->update_used_tables();
-
   /*
     Initialize examined rows here because the values from all join parts
     must be accumulated in examined_row_count. Hence every join
     iteration must count from zero.
   */
-  curr_join->join_examined_rows= 0;
-
-  curr_join->do_select_call_count= 0;
+  join_examined_rows= 0;
 
-  /* Create a tmp table if distinct or if the sort is too complicated */
-  if (need_tmp)
-  {
-    if (tmp_join)
-    {
-      /*
-        We are in a non cacheable sub query. Get the saved join structure
-        after optimization.
-        (curr_join may have been modified during last exection and we need
-        to reset it)
-      */
-      curr_join= tmp_join;
-    }
-    curr_tmp_table= exec_tmp_table1;
-
-    /* Copy data to the temporary table */
-    THD_STAGE_INFO(thd, stage_copying_to_tmp_table);
-    DBUG_PRINT("info", ("%s", thd->proc_info));
-    if (!curr_join->sort_and_group &&
-        curr_join->const_tables != curr_join->table_count)
-    {
-      JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
-      first_tab->sorted= MY_TEST(first_tab->loosescan_match_tab);
-    }
-
-    Procedure *save_proc= curr_join->procedure;
-    tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0);
-    curr_join->procedure= save_proc;
-    if (tmp_error)
-    {
-      error= tmp_error;
-      DBUG_VOID_RETURN;
-    }
-    curr_tmp_table->file->info(HA_STATUS_VARIABLE);
-    
-    if (curr_join->having)
-      curr_join->having= curr_join->tmp_having= 0; // Allready done
-    
-    /* Change sum_fields reference to calculated fields in tmp_table */
-#ifdef HAVE_valgrind
-    if (curr_join != this)
-#endif
-      curr_join->all_fields= *curr_all_fields;
-    if (!items1)
-    {
-      items1= items0 + all_fields.elements;
-      if (sort_and_group || curr_tmp_table->group ||
-          tmp_table_param.precomputed_group_by)
-      {
-	if (change_to_use_tmp_fields(thd, items1,
-				     tmp_fields_list1, tmp_all_fields1,
-				     fields_list.elements, all_fields))
-	  DBUG_VOID_RETURN;
-      }
-      else
-      {
-	if (change_refs_to_tmp_fields(thd, items1,
-				      tmp_fields_list1, tmp_all_fields1,
-				      fields_list.elements, all_fields))
-	  DBUG_VOID_RETURN;
-      }
-#ifdef HAVE_valgrind
-      if (curr_join != this)
-#endif
-      {
-        curr_join->tmp_all_fields1= tmp_all_fields1;
-        curr_join->tmp_fields_list1= tmp_fields_list1;
-      }
-      curr_join->items1= items1;
-    }
-    curr_all_fields= &tmp_all_fields1;
-    curr_fields_list= &tmp_fields_list1;
-    curr_join->set_items_ref_array(items1);
-    
-    if (sort_and_group || curr_tmp_table->group)
-    {
-      curr_join->tmp_table_param.field_count+= 
-	curr_join->tmp_table_param.sum_func_count+
-	curr_join->tmp_table_param.func_count;
-      curr_join->tmp_table_param.sum_func_count= 
-	curr_join->tmp_table_param.func_count= 0;
-    }
-    else
-    {
-      curr_join->tmp_table_param.field_count+= 
-	curr_join->tmp_table_param.func_count;
-      curr_join->tmp_table_param.func_count= 0;
-    }
-    
-    // procedure can't be used inside subselect => we do nothing special for it
-    if (procedure)
-      procedure->update_refs();
-    
-    if (curr_tmp_table->group)
-    {						// Already grouped
-      if (!curr_join->order && !curr_join->no_order && !skip_sort_order)
-	curr_join->order= curr_join->group_list;  /* order by group */
-      curr_join->group_list= 0;
-    }
-    
-    /*
-      If we have different sort & group then we must sort the data by group
-      and copy it to another tmp table
-      This code is also used if we are using distinct something
-      we haven't been able to store in the temporary table yet
-      like SEC_TO_TIME(SUM(...)).
-    */
-
-    if ((curr_join->group_list && (!test_if_subpart(curr_join->group_list,
-                                                    curr_join->order) || 
-                                   curr_join->select_distinct)) ||
-	(curr_join->select_distinct &&
-	 curr_join->tmp_table_param.using_indirect_summary_function))
-    {					/* Must copy to another table */
-      DBUG_PRINT("info",("Creating group table"));
-      
-      /* Free first data from old join */
-      
-      /*
-        psergey-todo: this is the place of pre-mature JOIN::free call.
-      */
-      curr_join->join_free();
-      if (curr_join->make_simple_join(this, curr_tmp_table))
-	DBUG_VOID_RETURN;
-      calc_group_buffer(curr_join, group_list);
-      count_field_types(select_lex, &curr_join->tmp_table_param,
-			curr_join->tmp_all_fields1,
-			curr_join->select_distinct && !curr_join->group_list);
-      curr_join->tmp_table_param.hidden_field_count= 
-	(curr_join->tmp_all_fields1.elements-
-	 curr_join->tmp_fields_list1.elements);
-      
-      if (exec_tmp_table2)
-	curr_tmp_table= exec_tmp_table2;
-      else
-      {
-	/* group data to new table */
-
-        /*
-          If the access method is loose index scan then all MIN/MAX
-          functions are precomputed, and should be treated as regular
-          functions. See extended comment in JOIN::exec.
-        */
-        if (curr_join->join_tab->is_using_loose_index_scan())
-          curr_join->tmp_table_param.precomputed_group_by= TRUE;
-
-	if (!(curr_tmp_table=
-	      exec_tmp_table2= create_tmp_table(thd,
-						&curr_join->tmp_table_param,
-						*curr_all_fields,
-						(ORDER*) 0,
-						curr_join->select_distinct && 
-						!curr_join->group_list,
-						1, curr_join->select_options,
-						HA_POS_ERROR, "")))
-	  DBUG_VOID_RETURN;
-	curr_join->exec_tmp_table2= exec_tmp_table2;
-        explain->ops_tracker.report_tmp_table(exec_tmp_table2);
-      }
-      if (curr_join->group_list)
-      {
-	if (curr_join->join_tab == join_tab && save_join_tab())
-	{
-	  DBUG_VOID_RETURN;
-	}
-	DBUG_PRINT("info",("Sorting for index"));
-	THD_STAGE_INFO(thd, stage_creating_sort_index);
-	if (create_sort_index(thd, curr_join, curr_join->group_list,
-			      HA_POS_ERROR, HA_POS_ERROR, FALSE) ||
-	    make_group_fields(this, curr_join))
-	{
-	  DBUG_VOID_RETURN;
-	}
-        sortorder= curr_join->sortorder;
-      }
-      
-      THD_STAGE_INFO(thd, stage_copying_to_group_table);
-      DBUG_PRINT("info", ("%s", thd->proc_info));
-      if (curr_join != this)
-      {
-	if (sum_funcs2)
-	{
-	  curr_join->sum_funcs= sum_funcs2;
-	  curr_join->sum_funcs_end= sum_funcs_end2; 
-	}
-	else
-	{
-	  curr_join->alloc_func_list();
-	  sum_funcs2= curr_join->sum_funcs;
-	  sum_funcs_end2= curr_join->sum_funcs_end;
-	}
-      }
-      if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list,
-					1, TRUE) ||
-        prepare_sum_aggregators(curr_join->sum_funcs,
-          !curr_join->join_tab->is_using_agg_loose_index_scan()))
-        DBUG_VOID_RETURN;
-      curr_join->group_list= 0;
-      if (!curr_join->sort_and_group &&
-          curr_join->const_tables != curr_join->table_count)
-      {
-        JOIN_TAB *first_tab= curr_join->join_tab + curr_join->const_tables;
-        first_tab->sorted= MY_TEST(first_tab->loosescan_match_tab);
-      }
-      tmp_error= -1;
-      if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
-	  (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
-				0)))
-      {
-	error= tmp_error;
-	DBUG_VOID_RETURN;
-      }
-      end_read_record(&curr_join->join_tab->read_record);
-      curr_join->const_tables= curr_join->table_count; // Mark free for cleanup()
-      curr_join->join_tab[0].table= 0;           // Table is freed
-      
-      // No sum funcs anymore
-      if (!items2)
-      {
-	items2= items1 + all_fields.elements;
-	if (change_to_use_tmp_fields(thd, items2,
-				     tmp_fields_list2, tmp_all_fields2, 
-				     fields_list.elements, tmp_all_fields1))
-	  DBUG_VOID_RETURN;
-#ifdef HAVE_valgrind
-        /*
-          Some GCCs use memcpy() for struct assignment, even for x=x.
-          GCC bug 19410: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
-        */
-        if (curr_join != this)
-#endif
-        {
-          curr_join->tmp_fields_list2= tmp_fields_list2;
-          curr_join->tmp_all_fields2= tmp_all_fields2;
-        }
-      }
-      curr_fields_list= &curr_join->tmp_fields_list2;
-      curr_all_fields= &curr_join->tmp_all_fields2;
-      curr_join->set_items_ref_array(items2);
-      curr_join->tmp_table_param.field_count+= 
-	curr_join->tmp_table_param.sum_func_count;
-      curr_join->tmp_table_param.sum_func_count= 0;
-    }
-    if (curr_tmp_table->distinct)
-      curr_join->select_distinct=0;		/* Each row is unique */
-    
-    curr_join->join_free();			/* Free quick selects */
-
-    if (curr_join->select_distinct && ! curr_join->group_list)
-    {
-      THD_STAGE_INFO(thd, stage_removing_duplicates);
-      if (remove_duplicates(curr_join, curr_tmp_table,
-			    *curr_fields_list, curr_join->tmp_having))
-	DBUG_VOID_RETURN;
-      curr_join->tmp_having=0;
-      curr_join->select_distinct=0;
-    }
-    curr_tmp_table->reginfo.lock_type= TL_UNLOCK;
-    // psergey-todo: here is one place where we switch to
-    if (curr_join->make_simple_join(this, curr_tmp_table))
-      DBUG_VOID_RETURN;
-    calc_group_buffer(curr_join, curr_join->group_list);
-    count_field_types(select_lex, &curr_join->tmp_table_param, 
-                      *curr_all_fields, 0);
-    
-  }
-  if (procedure)
-    count_field_types(select_lex, &curr_join->tmp_table_param, 
-                      *curr_all_fields, 0);
-  
-  if (curr_join->group || curr_join->implicit_grouping ||
-      curr_join->tmp_table_param.sum_func_count ||
-      (procedure && (procedure->flags & PROC_GROUP)))
-  {
-    if (make_group_fields(this, curr_join))
-    {
-      DBUG_VOID_RETURN;
-    }
-    if (!items3)
-    {
-      if (!items0)
-	init_items_ref_array();
-      items3= ref_pointer_array + (all_fields.elements*4);
-      setup_copy_fields(thd, &curr_join->tmp_table_param,
-			items3, tmp_fields_list3, tmp_all_fields3,
-			curr_fields_list->elements, *curr_all_fields);
-      tmp_table_param.save_copy_funcs= curr_join->tmp_table_param.copy_funcs;
-      tmp_table_param.save_copy_field= curr_join->tmp_table_param.copy_field;
-      tmp_table_param.save_copy_field_end=
-	curr_join->tmp_table_param.copy_field_end;
-#ifdef HAVE_valgrind
-      if (curr_join != this)
-#endif
-      {
-        curr_join->tmp_all_fields3= tmp_all_fields3;
-        curr_join->tmp_fields_list3= tmp_fields_list3;
-      }
-    }
-    else
-    {
-      curr_join->tmp_table_param.copy_funcs= tmp_table_param.save_copy_funcs;
-      curr_join->tmp_table_param.copy_field= tmp_table_param.save_copy_field;
-      curr_join->tmp_table_param.copy_field_end=
-	tmp_table_param.save_copy_field_end;
-    }
-    curr_fields_list= &tmp_fields_list3;
-    curr_all_fields= &tmp_all_fields3;
-    curr_join->set_items_ref_array(items3);
-
-    if (curr_join->make_sum_func_list(*curr_all_fields, *curr_fields_list,
-				      1, TRUE) || 
-        prepare_sum_aggregators(curr_join->sum_funcs,
-                                !curr_join->join_tab ||
-                                !curr_join->join_tab->
-                                  is_using_agg_loose_index_scan()) ||
-        setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
-        thd->is_fatal_error)
-      DBUG_VOID_RETURN;
-  }
-  if (curr_join->group_list || curr_join->order)
-  {
-    DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
-    THD_STAGE_INFO(thd, stage_sorting_result);
-    /* If we have already done the group, add HAVING to sorted table */
-    if (curr_join->tmp_having && ! curr_join->group_list && 
-	! curr_join->sort_and_group)
-    {
-      JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables];
-      table_map used_tables= (curr_join->const_table_map |
-			      curr_table->table->map);
-      curr_join->tmp_having->update_used_tables();  
-
-      Item* sort_table_cond= make_cond_for_table(thd, curr_join->tmp_having,
-						 used_tables,
-						 (table_map)0, -1,
-						 FALSE, FALSE);
-      if (sort_table_cond)
-      {
-	if (!curr_table->select)
-	  if (!(curr_table->select= new SQL_SELECT))
-	    DBUG_VOID_RETURN;
-	if (!curr_table->select->cond)
-	  curr_table->select->cond= sort_table_cond;
-	else
-	{
-	  if (!(curr_table->select->cond=
-                new (thd->mem_root) Item_cond_and(thd, curr_table->select->cond,
-                                  sort_table_cond)))
-	    DBUG_VOID_RETURN;
-	}
-        if (curr_table->pre_idx_push_select_cond)
-	{
-          if (sort_table_cond->type() == Item::COND_ITEM)
-            sort_table_cond= sort_table_cond->copy_andor_structure(thd);           
-          if (!(curr_table->pre_idx_push_select_cond= 
-                new (thd->mem_root) Item_cond_and(thd, curr_table->pre_idx_push_select_cond,
-                                  sort_table_cond)))
-            DBUG_VOID_RETURN;            
-        }
-        if (curr_table->select->cond && !curr_table->select->cond->fixed)
-	  curr_table->select->cond->fix_fields(thd, 0);
-        if (curr_table->pre_idx_push_select_cond &&
-            !curr_table->pre_idx_push_select_cond->fixed)
-          curr_table->pre_idx_push_select_cond->fix_fields(thd, 0);
-
-        curr_table->select->pre_idx_push_select_cond=
-          curr_table->pre_idx_push_select_cond;
-        curr_table->set_select_cond(curr_table->select->cond, __LINE__);
-	curr_table->select_cond->top_level_item();
-	DBUG_EXECUTE("where",print_where(curr_table->select->cond,
-					 "select and having",
-                                         QT_ORDINARY););
-	curr_join->tmp_having= make_cond_for_table(thd, curr_join->tmp_having,
-						   ~ (table_map) 0,
-						   ~used_tables, -1,
-						   FALSE, FALSE);
-	DBUG_EXECUTE("where",print_where(curr_join->tmp_having,
-                                         "having after sort",
-                                         QT_ORDINARY););
-      }
-    }
-    {
-      if (group)
-	curr_join->select_limit= HA_POS_ERROR;
-      else
-      {
-	/*
-	  We can abort sorting after thd->select_limit rows if we there is no
-	  WHERE clause for any tables after the sorted one.
-	*/
-	JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1];
-	JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_join_tab_count];
-	for (; curr_table < end_table ; curr_table++)
-	{
-	  /*
-	    table->keyuse is set in the case there was an original WHERE clause
-	    on the table that was optimized away.
-	  */
-	  if (curr_table->select_cond ||
-	      (curr_table->keyuse && !curr_table->first_inner))
-	  {
-	    /* We have to sort all rows */
-	    curr_join->select_limit= HA_POS_ERROR;
-	    break;
-	  }
-	}
-      }
-      if (curr_join->join_tab == join_tab && save_join_tab())
-      {
-	DBUG_VOID_RETURN;
-      }
-      /*
-	Here we sort rows for ORDER BY/GROUP BY clause, if the optimiser
-	chose FILESORT to be faster than INDEX SCAN or there is no 
-	suitable index present.
-	Note, that create_sort_index calls test_if_skip_sort_order and may
-	finally replace sorting with index scan if there is a LIMIT clause in
-	the query. XXX: it's never shown in EXPLAIN!
-	OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
-      */
-      DBUG_PRINT("info",("Sorting for order by/group by"));
-      ORDER *order_arg=
-        curr_join->group_list ? curr_join->group_list : curr_join->order;
-      /*
-        filesort_limit:	 Return only this many rows from filesort().
-        We can use select_limit_cnt only if we have no group_by and 1 table.
-        This allows us to use Bounded_queue for queries like:
-          "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
-        select_limit == HA_POS_ERROR (we need a full table scan)
-        unit->select_limit_cnt == 1 (we only need one row in the result set)
-       */
-      const ha_rows filesort_limit_arg=
-        (has_group_by || curr_join->table_count > 1)
-        ? curr_join->select_limit : unit->select_limit_cnt;
-      const ha_rows select_limit_arg=
-        select_options & OPTION_FOUND_ROWS
-        ? HA_POS_ERROR : unit->select_limit_cnt;
-      curr_join->filesort_found_rows= filesort_limit_arg != HA_POS_ERROR;
-
-      DBUG_PRINT("info", ("has_group_by %d "
-                          "curr_join->table_count %d "
-                          "curr_join->m_select_limit %d "
-                          "unit->select_limit_cnt %d",
-                          has_group_by,
-                          curr_join->table_count,
-                          (int) curr_join->select_limit,
-                          (int) unit->select_limit_cnt));
-      if (create_sort_index(thd,
-                            curr_join,
-                            order_arg,
-                            filesort_limit_arg,
-                            select_limit_arg,
-                            curr_join->group_list ? FALSE : TRUE))
-	DBUG_VOID_RETURN;
-      sortorder= curr_join->sortorder;
-      if (curr_join->const_tables != curr_join->table_count &&
-          !curr_join->join_tab[curr_join->const_tables].filesort)
-      {
-        /*
-          If no filesort for the first table then we are using an
-          INDEX SCAN. Thus we should not remove the sorted attribute
-          on the INDEX SCAN.
-        */
-        skip_sort_order= 1;
-      }
-    }
-  }
   /* XXX: When can we have here thd->is_error() not zero? */
   if (thd->is_error())
   {
     error= thd->is_error();
     DBUG_VOID_RETURN;
   }
-  curr_join->having= curr_join->tmp_having;
-  curr_join->fields= curr_fields_list;
-  curr_join->procedure= procedure;
 
   THD_STAGE_INFO(thd, stage_sending_data);
   DBUG_PRINT("info", ("%s", thd->proc_info));
-  result->send_result_set_metadata((procedure ? curr_join->procedure_fields_list :
-                                    *curr_fields_list),
-                                   Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
-  error= do_select(curr_join, curr_fields_list, NULL, procedure);
-  if (curr_join->order && curr_join->sortorder &&
-      curr_join->filesort_found_rows)
-  {
-    /* Use info provided by filesort. */
-    DBUG_ASSERT(curr_join->table_count > curr_join->const_tables);
-    JOIN_TAB *tab= curr_join->join_tab + curr_join->const_tables;
-    thd->limit_found_rows= tab->records;
-  }
-
+  result->send_result_set_metadata(
+                 procedure ? procedure_fields_list : *fields,
+                 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
+  error= do_select(this, procedure);
   /* Accumulate the counts from all join iterations of all join parts. */
-  thd->inc_examined_row_count(curr_join->join_examined_rows);
+  thd->inc_examined_row_count(join_examined_rows);
   DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
                         (ulong) thd->get_examined_row_count()));
 
-  /* 
-    With EXPLAIN EXTENDED we have to restore original ref_array
-    for a derived table which is always materialized.
-    We also need to do this when we have temp table(s).
-    Otherwise we would not be able to print the query correctly.
-  */ 
-  if (items0 && (thd->lex->describe & DESCRIBE_EXTENDED) &&
-      (select_lex->linkage == DERIVED_TABLE_TYPE ||
-       exec_tmp_table1 || exec_tmp_table2))
-    set_items_ref_array(items0);
-
   DBUG_VOID_RETURN;
 }
 
@@ -3263,41 +3345,32 @@ JOIN::destroy()
   DBUG_ENTER("JOIN::destroy");
   select_lex->join= 0;
 
-  if (tmp_join)
-  {
-    if (join_tab != tmp_join->join_tab)
-    {
-      JOIN_TAB *tab;
-      for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES);
-           tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
-      {
-	tab->cleanup();
-      }
-    }
-    tmp_join->tmp_join= 0;
-    /*
-      We need to clean up tmp_table_param for reusable JOINs (having non-zero
-      and different from self tmp_join) because it's not being cleaned up
-      anywhere else (as we need to keep the join is reusable).
-    */
-    tmp_table_param.cleanup();
-    tmp_join->tmp_table_param.copy_field= 0;
-    DBUG_RETURN(tmp_join->destroy());
-  }
   cond_equal= 0;
   having_equal= 0;
 
   cleanup(1);
+
+  uint tables= table_count+aggr_tables;
+
+  if (join_tab) // We should not have tables > 0 and join_tab != NULL
+   for (JOIN_TAB *tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES);
+        tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
+  {
+    if (tab->aggr)
+    {
+      free_tmp_table(thd, tab->table);
+      delete tab->tmp_table_param;
+      tab->tmp_table_param= NULL;
+      tab->aggr= NULL;
+    }
+
+    tab->table= NULL;
+  }
  /* Cleanup items referencing temporary table columns */
   cleanup_item_list(tmp_all_fields1);
   cleanup_item_list(tmp_all_fields3);
-  if (exec_tmp_table1)
-    free_tmp_table(thd, exec_tmp_table1);
-  if (exec_tmp_table2)
-    free_tmp_table(thd, exec_tmp_table2);
-  delete select;
   destroy_sj_tmp_tables(this);
-  delete_dynamic(&keyuse);
+  delete_dynamic(&keyuse); 
   delete procedure;
   DBUG_RETURN(error);
 }
@@ -3362,7 +3435,7 @@ void JOIN::cleanup_item_list(List<Item> &items) const
 */
 
 bool
-mysql_select(THD *thd, Item ***rref_pointer_array,
+mysql_select(THD *thd,
 	     TABLE_LIST *tables, uint wild_num, List<Item> &fields,
 	     COND *conds, uint og_num,  ORDER *order, ORDER *group,
 	     Item *having, ORDER *proc_param, ulonglong select_options,
@@ -3397,7 +3470,7 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
       }
       else
       {
-        if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+        if ((err= join->prepare( tables, wild_num,
                                 conds, og_num, order, false, group, having,
                                 proc_param, select_lex, unit)))
 	{
@@ -3421,7 +3494,7 @@ mysql_select(THD *thd, Item ***rref_pointer_array,
 	DBUG_RETURN(TRUE);
     THD_STAGE_INFO(thd, stage_init);
     thd->lex->used_tables=0;
-    if ((err= join->prepare(rref_pointer_array, tables, wild_num,
+    if ((err= join->prepare(tables, wild_num,
                             conds, og_num, order, false, group, having, proc_param,
                             select_lex, unit)))
     {
@@ -4224,7 +4297,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
   DEBUG_SYNC(join->thd, "inside_make_join_statistics");
 
   /* Generate an execution plan from the found optimal join order. */
-  DBUG_RETURN(join->thd->check_killed() || get_best_combination(join));
+  DBUG_RETURN(join->thd->check_killed() || join->get_best_combination());
 
 error:
   /*
@@ -4516,9 +4589,9 @@ add_key_field(JOIN *join,
       ((join->is_allowed_hash_join_access() &&
         field->hash_join_is_possible() && 
         !(field->table->pos_in_table_list->is_materialized_derived() &&
-          field->table->created)) ||
+          field->table->is_created())) ||
        (field->table->pos_in_table_list->is_materialized_derived() &&
-        !field->table->created && !(field->flags & BLOB_FLAG))))
+        !field->table->is_created() && !(field->flags & BLOB_FLAG))))
   {
     optimize= KEY_OPTIMIZE_EQ;
   }   
@@ -5725,7 +5798,8 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
       item->walk(&Item::collect_item_field_processor, 0,
                  (uchar*) &indexed_fields);
   }
-  else if (is_indexed_agg_distinct(join, &indexed_fields))
+  else if (join->tmp_table_param.sum_func_count &&
+           is_indexed_agg_distinct(join, &indexed_fields))
   {
     join->sort_and_group= 1;
   }
@@ -7312,13 +7386,13 @@ double JOIN::get_examined_rows()
 {
   double examined_rows;
   double prev_fanout= 1;
-  JOIN_TAB *tab= first_breadth_first_optimization_tab();
+  JOIN_TAB *tab= first_breadth_first_tab();
   JOIN_TAB *prev_tab= tab;
 
   examined_rows= tab->get_examined_rows();
 
-  while ((tab= next_breadth_first_tab(first_breadth_first_optimization_tab(),
-                                      top_table_access_tabs_count, tab)))
+  while ((tab= next_breadth_first_tab(first_breadth_first_tab(),
+                                      top_join_tab_count, tab)))
   {
     prev_fanout *= prev_tab->records_read;
     examined_rows+= tab->get_examined_rows() * prev_fanout;
@@ -8215,6 +8289,7 @@ prev_record_reads(POSITION *positions, uint idx, table_map found_ref)
 static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
                                         uint n_top_tabs_count, JOIN_TAB *tab)
 {
+  n_top_tabs_count += tab->join->aggr_tables;
   if (!tab->bush_root_tab)
   {
     /* We're at top level. Get the next top-level tab */
@@ -8266,7 +8341,7 @@ static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
 JOIN_TAB *first_explain_order_tab(JOIN* join)
 {
   JOIN_TAB* tab;
-  tab= join->table_access_tabs;
+  tab= join->join_tab;
   return (tab->bush_children) ? tab->bush_children->start : tab;
 }
 
@@ -8280,7 +8355,7 @@ JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab)
   /* Move to next tab in the array we're traversing */
   tab++;
   
-  if (tab == join->table_access_tabs + join->top_join_tab_count)
+  if (tab == join->join_tab + join->top_join_tab_count)
     return NULL; /* Outside SJM nest and reached EOF */
 
   if (tab->bush_children)
@@ -8306,7 +8381,7 @@ JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls
 
 JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
 {
-  tab= next_breadth_first_tab(join->first_breadth_first_execution_tab(),
+  tab= next_breadth_first_tab(join->first_breadth_first_tab(),
                               join->top_join_tab_count, tab);
   if (tab && tab->bush_root_tab)
     tab= NULL;
@@ -8384,7 +8459,7 @@ JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab,
   }
 
   /* If no more JOIN_TAB's on the top level */
-  if (++tab == join->join_tab + join->top_join_tab_count)
+  if (++tab == join->join_tab + join->top_join_tab_count + join->aggr_tables)
     return NULL;
 
   if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children)
@@ -8478,37 +8553,58 @@ static Item * const null_ptr= NULL;
     TRUE   Out of memory
 */
 
-bool
-get_best_combination(JOIN *join)
+bool JOIN::get_best_combination()
 {
   uint tablenr;
   table_map used_tables;
-  JOIN_TAB *join_tab,*j;
+  JOIN_TAB *j;
   KEYUSE *keyuse;
-  uint table_count;
-  THD *thd=join->thd;
   DBUG_ENTER("get_best_combination");
 
-  table_count=join->table_count;
-  if (!(join->join_tab=join_tab=
-	(JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count)))
+   /*
+    Additional plan nodes for postjoin tmp tables:
+      1? + // For GROUP BY
+      1? + // For DISTINCT
+      1? + // For aggregation functions aggregated in outer query
+           // when used with distinct
+      1? + // For ORDER BY
+      1?   // buffer result
+    Up to 2 tmp tables are actually used, but it's hard to tell exact number
+    at this stage.
+  */ 
+  uint aggr_tables= (group_list ? 1 : 0) +
+                    (select_distinct ?
+                     (tmp_table_param. using_outer_summary_function ? 2 : 1) : 0) +
+                    (order ? 1 : 0) +
+       (select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ;
+  
+  if (aggr_tables == 0)
+    aggr_tables= 1; /* For group by pushdown */
+
+  if (select_lex->window_specs.elements)
+    aggr_tables++;
+
+  if (aggr_tables > 2)
+    aggr_tables= 2;
+  if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*
+                                        (top_join_tab_count + aggr_tables))))
     DBUG_RETURN(TRUE);
 
-  join->full_join=0;
-  join->hash_join= FALSE;
+  full_join=0;
+  hash_join= FALSE;
 
   used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
 
-  fix_semijoin_strategies_for_picked_join_order(join);
-  
+  fix_semijoin_strategies_for_picked_join_order(this);
+   
   JOIN_TAB_RANGE *root_range;
   if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE))
     DBUG_RETURN(TRUE);
-  root_range->start= join->join_tab;
+   root_range->start= join_tab;
   /* root_range->end will be set later */
-  join->join_tab_ranges.empty();
+  join_tab_ranges.empty();
 
-  if (join->join_tab_ranges.push_back(root_range, thd->mem_root))
+  if (join_tab_ranges.push_back(root_range, thd->mem_root))
     DBUG_RETURN(TRUE);
 
   JOIN_TAB *sjm_nest_end= NULL;
@@ -8517,7 +8613,7 @@ get_best_combination(JOIN *join)
   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
   {
     TABLE *form;
-    POSITION *cur_pos= &join->best_positions[tablenr];
+    POSITION *cur_pos= &best_positions[tablenr];
     if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || 
         cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
     {
@@ -8528,7 +8624,7 @@ get_best_combination(JOIN *join)
            in the temptable.
       */
       bzero(j, sizeof(JOIN_TAB));
-      j->join= join;
+      j->join= this;
       j->table= NULL; //temporary way to tell SJM tables from others.
       j->ref.key = -1;
       j->on_expr_ref= (Item**) &null_ptr;
@@ -8544,12 +8640,12 @@ get_best_combination(JOIN *join)
       j->cond_selectivity= 1.0;
       JOIN_TAB *jt;
       JOIN_TAB_RANGE *jt_range;
-      if (!(jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) ||
+      if (!(jt= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) ||
           !(jt_range= new JOIN_TAB_RANGE))
         DBUG_RETURN(TRUE);
       jt_range->start= jt;
       jt_range->end= jt + sjm->tables;
-      join->join_tab_ranges.push_back(jt_range, join->thd->mem_root);
+      join_tab_ranges.push_back(jt_range, thd->mem_root);
       j->bush_children= jt_range;
       sjm_nest_end= jt + sjm->tables;
       sjm_nest_root= j;
@@ -8557,11 +8653,11 @@ get_best_combination(JOIN *join)
       j= jt;
     }
     
-    *j= *join->best_positions[tablenr].table;
+    *j= *best_positions[tablenr].table;
 
     j->bush_root_tab= sjm_nest_root;
 
-    form=join->table[tablenr]=j->table;
+    form= table[tablenr]= j->table;
     used_tables|= form->map;
     form->reginfo.join_tab=j;
     if (!*j->on_expr_ref)
@@ -8577,36 +8673,36 @@ get_best_combination(JOIN *join)
 
     if (j->type == JT_SYSTEM)
       goto loop_end;
-    if ( !(keyuse= join->best_positions[tablenr].key))
+    if ( !(keyuse= best_positions[tablenr].key))
     {
       j->type=JT_ALL;
-      if (join->best_positions[tablenr].use_join_buffer &&
-          tablenr != join->const_tables)
-	join->full_join= 1;
+      if (best_positions[tablenr].use_join_buffer &&
+          tablenr != const_tables)
+	full_join= 1;
     }
 
-    /*if (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)
+    /*if (best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)
     {
       DBUG_ASSERT(!keyuse || keyuse->key ==
-                             join->best_positions[tablenr].loosescan_picker.loosescan_key);
-      j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
+                             best_positions[tablenr].loosescan_picker.loosescan_key);
+      j->index= best_positions[tablenr].loosescan_picker.loosescan_key;
     }*/
     
-    if (keyuse && create_ref_for_key(join, j, keyuse, TRUE, used_tables))
+    if (keyuse && create_ref_for_key(this, j, keyuse, TRUE, used_tables))
       DBUG_RETURN(TRUE);                        // Something went wrong
 
     if ((j->type == JT_REF || j->type == JT_EQ_REF) &&
         is_hash_join_key_no(j->ref.key))
-      join->hash_join= TRUE; 
+      hash_join= TRUE; 
 
   loop_end:
     /* 
       Save records_read in JOIN_TAB so that select_describe()/etc don't have
       to access join->best_positions[]. 
     */
-    j->records_read= join->best_positions[tablenr].records_read;
-    j->cond_selectivity= join->best_positions[tablenr].cond_selectivity;
-    join->map2table[j->table->tablenr]= j;
+    j->records_read= best_positions[tablenr].records_read;
+    j->cond_selectivity= best_positions[tablenr].cond_selectivity;
+    map2table[j->table->tablenr]= j;
 
     /* If we've reached the end of sjm nest, switch back to main sequence */
     if (j + 1 == sjm_nest_end)
@@ -8619,16 +8715,10 @@ get_best_combination(JOIN *join)
   }
   root_range->end= j;
 
-  join->top_join_tab_count= join->join_tab_ranges.head()->end - 
-                            join->join_tab_ranges.head()->start;
-  /*
-    Save pointers to select join tabs for SHOW EXPLAIN
-  */
-  join->table_access_tabs= join->join_tab;
-  join->top_table_access_tabs_count= join->top_join_tab_count;
-
+  top_join_tab_count= join_tab_ranges.head()->end - 
+                      join_tab_ranges.head()->start;
 
-  update_depend_map(join);
+  update_depend_map(this);
   DBUG_RETURN(0);
 }
 
@@ -9027,115 +9117,6 @@ get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
 			    keyuse->val, FALSE);
 }
 
-/**
-  @details Initialize a JOIN as a query execution plan
-  that accesses a single table via a table scan.
-
-  @param  parent      contains JOIN_TAB and TABLE object buffers for this join
-  @param  tmp_table   temporary table
-
-  @retval FALSE       success
-  @retval TRUE        error occurred
-*/
-bool
-JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
-{
-  DBUG_ENTER("JOIN::make_simple_join");
-
-  /*
-    Reuse TABLE * and JOIN_TAB if already allocated by a previous call
-    to this function through JOIN::exec (may happen for sub-queries).
-
-    psergey-todo: here, save the pointer for original join_tabs.
-  */
-  if (!(join_tab= parent->join_tab_reexec))
-  {
-    if (!(join_tab= parent->join_tab_reexec=
-          (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
-      DBUG_RETURN(TRUE);                        /* purecov: inspected */
-  }
-  else
-  {
-    /* Free memory used by previous allocations */
-    delete join_tab->filesort;
-  }
-
-  table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table;
-  table_count= top_join_tab_count= 1;
-
-  const_tables= 0;
-  const_table_map= 0;
-  eliminated_tables= 0;
-  tmp_table_param.field_count= tmp_table_param.sum_func_count=
-    tmp_table_param.func_count= 0;
-  /*
-    We need to destruct the copy_field (allocated in create_tmp_table())
-    before setting it to 0 if the join is not "reusable".
-  */
-  if (!tmp_join || tmp_join != this) 
-    tmp_table_param.cleanup(); 
-  tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
-  first_record= sort_and_group=0;
-  send_records= (ha_rows) 0;
-
-  if (group_optimized_away && !tmp_table_param.precomputed_group_by)
-  {
-    /*
-      If grouping has been optimized away, a temporary table is
-      normally not needed unless we're explicitly requested to create
-      one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
-
-      In this case (grouping was optimized away), temp_table was
-      created without a grouping expression and JOIN::exec() will not
-      perform the necessary grouping (by the use of end_send_group()
-      or end_write_group()) if JOIN::group is set to false.
-
-      There is one exception: if the loose index scan access method is
-      used to read into the temporary table, grouping and aggregate
-      functions are handled.
-    */
-    // the temporary table was explicitly requested
-    DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT));
-    // the temporary table does not have a grouping expression
-    DBUG_ASSERT(!temp_table->group); 
-  }
-  else
-    group= false;
-
-  row_limit= unit->select_limit_cnt;
-  do_send_rows= row_limit ? 1 : 0;
-
-  bzero(join_tab, sizeof(JOIN_TAB));
-  join_tab->table=temp_table;
-  join_tab->set_select_cond(NULL, __LINE__);
-  join_tab->type= JT_ALL;			/* Map through all records */
-  join_tab->keys.init();
-  join_tab->keys.set_all();                     /* test everything in quick */
-  join_tab->ref.key = -1;
-  join_tab->shortcut_for_distinct= false;
-  join_tab->read_first_record= join_init_read_record;
-  join_tab->join= this;
-  join_tab->ref.key_parts= 0;
-  
-  uint select_nr= select_lex->select_number;
-  if (select_nr == INT_MAX) 
-  {
-    /* this is a fake_select_lex of a union */
-    select_nr= select_lex->master_unit()->first_select()->select_number;
-    join_tab->tracker= thd->lex->explain->get_union(select_nr)->
-                       get_tmptable_read_tracker();
-  }
-  else
-  {
-    join_tab->tracker= thd->lex->explain->get_select(select_nr)->
-                       get_using_temporary_read_tracker();
-  }
-  bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record));
-  temp_table->status=0;
-  temp_table->null_row=0;
-  DBUG_RETURN(FALSE);
-}
-
 
 inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2)
 {
@@ -9551,6 +9532,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
          tab= next_depth_first_tab(join, tab), i++)
     {
       bool is_hj;
+
       /*
         first_inner is the X in queries like:
         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
@@ -11181,7 +11163,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
                                 join_read_system :join_read_const;
       if (table->covering_keys.is_set(tab->ref.key) &&
           !table->no_keyread)
-        table->enable_keyread();
+        table->set_keyread(true);
       else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
         push_index_cond(tab, tab->ref.key);
       break;
@@ -11190,7 +11172,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
       /* fall through */
       if (table->covering_keys.is_set(tab->ref.key) &&
 	  !table->no_keyread)
-        table->enable_keyread();
+        table->set_keyread(true);
       else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
         push_index_cond(tab, tab->ref.key);
       break;
@@ -11205,7 +11187,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
       tab->quick=0;
       if (table->covering_keys.is_set(tab->ref.key) &&
 	  !table->no_keyread)
-        table->enable_keyread();
+        table->set_keyread(true);
       else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
         push_index_cond(tab, tab->ref.key);
       break;
@@ -11268,7 +11250,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
 	  if (tab->select && tab->select->quick &&
               tab->select->quick->index != MAX_KEY && //not index_merge
 	      table->covering_keys.is_set(tab->select->quick->index))
-            table->enable_keyread();
+            table->set_keyread(true);
 	  else if (!table->covering_keys.is_clear_all() &&
 		   !(tab->select && tab->select->quick))
 	  {					// Only read index tree
@@ -11438,9 +11420,21 @@ void JOIN_TAB::cleanup()
     cache= 0;
   }
   limit= 0;
+  // Free select that was created for filesort outside of create_sort_index
+  if (filesort && filesort->select && !filesort->own_select)
+    delete filesort->select;
+  delete filesort;
+  filesort= NULL;
+  /* Skip non-existing derived tables/views result tables */
+  if (table &&
+      (table->s->tmp_table != INTERNAL_TMP_TABLE || table->is_created()))
+  {
+    table->set_keyread(FALSE);
+    table->file->ha_index_or_rnd_end();
+  }
   if (table)
   {
-    table->disable_keyread();
+    table->set_keyread(false);
     table->file->ha_index_or_rnd_end();
     preread_init_done= FALSE;
     if (table->pos_in_table_list && 
@@ -11490,7 +11484,7 @@ void JOIN_TAB::cleanup()
 double JOIN_TAB::scan_time()
 {
   double res;
-  if (table->created)
+  if (table->is_created())
   {
     if (table->is_filled_at_execution())
     {
@@ -11529,9 +11523,10 @@ double JOIN_TAB::scan_time()
 ha_rows JOIN_TAB::get_examined_rows()
 {
   double examined_rows;
+  SQL_SELECT *sel= filesort? filesort->select : this->select;
 
-  if (select && select->quick && use_quick != 2)
-    examined_rows= select->quick->records;
+  if (sel && sel->quick && use_quick != 2)
+    examined_rows= sel->quick->records;
   else if (type == JT_NEXT || type == JT_ALL ||
            type == JT_HASH || type ==JT_HASH_NEXT)
   {
@@ -11818,35 +11813,12 @@ void JOIN::cleanup(bool full)
     table_count= original_table_count;
   }
 
-  if (table)
+  if (join_tab)
   {
     JOIN_TAB *tab;
-    /*
-      Only a sorted table may be cached.  This sorted table is always the
-      first non const table in join->table
-    */
-    if (table_count > const_tables) // Test for not-const tables
-    {
-      JOIN_TAB *first_tab= first_top_level_tab(this, WITHOUT_CONST_TABLES);
-      if (first_tab->table)
-      {
-        delete first_tab->filesort;
-        first_tab->filesort= 0;
-      }
-    }
+
     if (full)
     {
-      JOIN_TAB *sort_tab= first_linear_tab(this, WITH_BUSH_ROOTS,
-                                           WITHOUT_CONST_TABLES);
-      if (pre_sort_join_tab)
-      {
-        if (sort_tab && sort_tab->select == pre_sort_join_tab->select)
-        {
-          pre_sort_join_tab->select= NULL;
-        }
-        else
-          clean_pre_sort_join_tab();
-      }
       /*
         Call cleanup() on join tabs used by the join optimization
         (join->join_tab may now be pointing to result of make_simple_join
@@ -11858,49 +11830,64 @@ void JOIN::cleanup(bool full)
       */
       if (table_count)
       {
-        for (tab= first_breadth_first_optimization_tab(); tab;
-             tab= next_breadth_first_tab(first_breadth_first_optimization_tab(),
-                                         top_table_access_tabs_count, tab))
-          tab->cleanup();
-
-        /* We've walked optimization tabs, do execution ones too. */
-        if (first_breadth_first_execution_tab() !=
-            first_breadth_first_optimization_tab())
+        for (tab= first_breadth_first_tab(); tab;
+             tab= next_breadth_first_tab(first_breadth_first_tab(),
+                                         top_join_tab_count, tab))
         {
-          for (tab= first_breadth_first_execution_tab(); tab;
-               tab= next_breadth_first_tab(first_breadth_first_execution_tab(),
-                                           top_join_tab_count, tab))
-            tab->cleanup();
+          tab->cleanup();
+          delete tab->filesort_result;
+          tab->filesort_result= NULL;
         }
       }
       cleaned= true;
+      //psergey2: added (Q: why not in the above loop?)
+      {
+        JOIN_TAB *curr_tab= join_tab + top_join_tab_count;
+        for (uint i= 0; i < aggr_tables; i++, curr_tab++)
+        {
+          if (curr_tab->aggr)
+          {
+            free_tmp_table(thd, curr_tab->table);
+            delete curr_tab->tmp_table_param;
+            curr_tab->tmp_table_param= NULL;
+            curr_tab->aggr= NULL;
+
+            delete curr_tab->filesort_result;
+            curr_tab->filesort_result= NULL;
+          }
+        }
+        aggr_tables= 0; // psergey3
+      }
     }
     else
     {
       for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
            tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
       {
-	if (tab->table)
+        if (!tab->table)
+          continue;
+        DBUG_PRINT("info", ("close index: %s.%s  alias: %s",
+                            tab->table->s->db.str,
+                            tab->table->s->table_name.str,
+                            tab->table->alias.c_ptr()));
+	if (tab->table->is_created())
         {
-          DBUG_PRINT("info", ("close index: %s.%s  alias: %s",
-                              tab->table->s->db.str,
-                              tab->table->s->table_name.str,
-                              tab->table->alias.c_ptr()));
           tab->table->file->ha_index_or_rnd_end();
+          if (tab->aggr)
+          {
+            int tmp= 0;
+            if ((tmp= tab->table->file->extra(HA_EXTRA_NO_CACHE)))
+              tab->table->file->print_error(tmp, MYF(0));
+          }
         }
+        delete tab->filesort_result;
+        tab->filesort_result= NULL;
       }
     }
   }
   if (full)
   {
     cleanup_empty_jtbm_semi_joins(this, join_list);
-    /* 
-      Ensure that the following delete_elements() would not be called
-      twice for the same list.
-    */
-    if (tmp_join && tmp_join != this &&
-        tmp_join->group_fields == this->group_fields)
-      tmp_join->group_fields.empty();
 
     // Run Cached_item DTORs!
     group_fields.delete_elements();
@@ -11916,14 +11903,6 @@ void JOIN::cleanup(bool full)
       pointer to  tmp_table_param.copy_field from tmp_join, because it qill
       be removed in tmp_table_param.cleanup().
     */
-    if (tmp_join &&
-        tmp_join != this &&
-        tmp_join->tmp_table_param.copy_field ==
-        tmp_table_param.copy_field)
-    {
-      tmp_join->tmp_table_param.copy_field=
-        tmp_join->tmp_table_param.save_copy_field= 0;
-    }
     tmp_table_param.cleanup();
 
     delete pushdown_query;
@@ -11945,6 +11924,12 @@ void JOIN::cleanup(bool full)
       }
     }
   }
+  /* Restore ref array to original state */
+  if (current_ref_ptrs != items0)
+  {
+    set_items_ref_array(items0);
+    set_group_rpa= false;
+  }
   DBUG_VOID_RETURN;
 }
 
@@ -12120,8 +12105,8 @@ static ORDER *
 remove_const(JOIN *join,ORDER *first_order, COND *cond,
              bool change_list, bool *simple_order)
 {
-  *simple_order= 1;
-  if (join->table_count == join->const_tables)
+  *simple_order= join->rollup.state == ROLLUP::STATE_NONE;
+  if (join->only_const_tables())
     return change_list ? 0 : first_order;		// No need to sort
 
   ORDER *order,**prev_ptr, *tmp_order;
@@ -15973,6 +15958,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   case Item::NULL_ITEM:
   case Item::VARBIN_ITEM:
   case Item::CACHE_ITEM:
+  case Item::WINDOW_FUNC_ITEM: // psergey-winfunc:
   case Item::EXPR_CACHE_ITEM:
     if (make_copy_field)
     {
@@ -16249,7 +16235,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
 
   reclength= string_total_length= 0;
   blob_count= string_count= null_count= hidden_null_count= group_null_items= 0;
-  param->using_indirect_summary_function=0;
+  param->using_outer_summary_function= 0;
 
   List_iterator_fast<Item> li(fields);
   Item *item;
@@ -16271,7 +16257,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
 	    function. We need to know this if someone is going to use
 	    DISTINCT on the result.
 	  */
-	  param->using_indirect_summary_function=1;
+	  param->using_outer_summary_function=1;
 	  continue;
         }
       }
@@ -16887,13 +16873,8 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
 
   if (!do_not_open)
   {
-    if (share->db_type() == TMP_ENGINE_HTON)
-    {
-      if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo,
-                                    &param->recinfo, select_options))
-        goto err;
-    }
-    if (open_tmp_table(table))
+    if (instantiate_tmp_table(table, param->keyinfo, param->start_recinfo,
+                              &param->recinfo, select_options))
       goto err;
   }
 
@@ -17034,9 +17015,9 @@ bool open_tmp_table(TABLE *table)
   }
   table->db_stat= HA_OPEN_KEYFILE+HA_OPEN_RNDFILE;
   (void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
-  if (!table->created)
+  if (!table->is_created())
   {
-    table->created= TRUE;
+    table->set_created();
     table->in_use->inc_status_created_tmp_tables();
   }
 
@@ -17242,7 +17223,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
   table->in_use->inc_status_created_tmp_tables();
   table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
   share->db_record_offset= 1;
-  table->created= TRUE;
+  table->set_created();
   DBUG_RETURN(0);
  err:
   DBUG_RETURN(1);
@@ -17560,7 +17541,7 @@ free_tmp_table(THD *thd, TABLE *entry)
   save_proc_info=thd->proc_info;
   THD_STAGE_INFO(thd, stage_removing_tmp_table);
 
-  if (entry->file && entry->created)
+  if (entry->file && entry->is_created())
   {
     entry->file->ha_index_or_rnd_end();
     if (entry->db_stat)
@@ -17588,81 +17569,101 @@ free_tmp_table(THD *thd, TABLE *entry)
 
 
 /**
-  @details
-  Rows produced by a join sweep may end up in a temporary table or be sent
-  to a client. Setup the function of the nested loop join algorithm which
-  handles final fully constructed and matched records.
+  @brief
+  Set write_func of AGGR_OP object
 
-  @param join   join to setup the function for.
+  @param join_tab JOIN_TAB of the corresponding tmp table
 
-  @return
-    end_select function to use. This function can't fail.
+  @details
+  Function sets up write_func according to how AGGR_OP object that
+  is attached to the given join_tab will be used in the query.
 */
 
-Next_select_func setup_end_select_func(JOIN *join)
+void set_postjoin_aggr_write_func(JOIN_TAB *tab)
 {
-  TABLE *table= join->tmp_table;
-  TMP_TABLE_PARAM *tmp_tbl= &join->tmp_table_param;
-  Next_select_func end_select;
+  JOIN *join= tab->join;
+  TABLE *table= tab->table;
+  AGGR_OP *aggr= tab->aggr;
+  TMP_TABLE_PARAM *tmp_tbl= tab->tmp_table_param;
 
-  /* Set up select_end */
-  if (table)
+  DBUG_ASSERT(table && aggr);
+
+  if (table->group && tmp_tbl->sum_func_count && 
+      !tmp_tbl->precomputed_group_by)
   {
-    if (table->group && tmp_tbl->sum_func_count && 
-        !tmp_tbl->precomputed_group_by)
-    {
-      if (table->s->keys)
-      {
-	DBUG_PRINT("info",("Using end_update"));
-	end_select=end_update;
-      }
-      else
-      {
-	DBUG_PRINT("info",("Using end_unique_update"));
-	end_select=end_unique_update;
-      }
-    }
-    else if (join->sort_and_group && !tmp_tbl->precomputed_group_by)
+    /*
+      Note for MyISAM tmp tables: if uniques is true keys won't be
+      created.
+    */
+    if (table->s->keys && !table->s->uniques)
     {
-      DBUG_PRINT("info",("Using end_write_group"));
-      end_select=end_write_group;
+      DBUG_PRINT("info",("Using end_update"));
+      aggr->set_write_func(end_update);
     }
     else
     {
-      DBUG_PRINT("info",("Using end_write"));
-      end_select=end_write;
-      if (tmp_tbl->precomputed_group_by)
-      {
-        /*
-          A preceding call to create_tmp_table in the case when loose
-          index scan is used guarantees that
-          TMP_TABLE_PARAM::items_to_copy has enough space for the group
-          by functions. It is OK here to use memcpy since we copy
-          Item_sum pointers into an array of Item pointers.
-        */
-        memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count,
-               join->sum_funcs,
-               sizeof(Item*)*tmp_tbl->sum_func_count);
-        tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0;
-      }
+      DBUG_PRINT("info",("Using end_unique_update"));
+      aggr->set_write_func(end_unique_update);
     }
   }
+  else if (join->sort_and_group && !tmp_tbl->precomputed_group_by &&
+           !join->sort_and_group_aggr_tab)
+  {
+    DBUG_PRINT("info",("Using end_write_group"));
+    aggr->set_write_func(end_write_group);
+    join->sort_and_group_aggr_tab= tab;
+  }
   else
   {
-    /* 
-       Choose method for presenting result to user. Use end_send_group
-       if the query requires grouping (has a GROUP BY clause and/or one or
-       more aggregate functions). Use end_send if the query should not
-       be grouped.
-     */
-    if ((join->sort_and_group ||
-         (join->procedure && join->procedure->flags & PROC_GROUP)) &&
-        !tmp_tbl->precomputed_group_by)
-      end_select= end_send_group;
-    else
-      end_select= end_send;
+    DBUG_PRINT("info",("Using end_write"));
+    aggr->set_write_func(end_write);
+    if (tmp_tbl->precomputed_group_by)
+    {
+      /*
+        A preceding call to create_tmp_table in the case when loose
+        index scan is used guarantees that
+        TMP_TABLE_PARAM::items_to_copy has enough space for the group
+        by functions. It is OK here to use memcpy since we copy
+        Item_sum pointers into an array of Item pointers.
+      */
+      memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count,
+             join->sum_funcs,
+             sizeof(Item*)*tmp_tbl->sum_func_count);
+      tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0;
+    }
+  }
+}
+
+
+/**
+  @details
+  Rows produced by a join sweep may end up in a temporary table or be sent
+  to a client. Set the function of the nested loop join algorithm which
+  handles final fully constructed and matched records.
+
+  @param join   join to setup the function for.
+
+  @return
+    end_select function to use. This function can't fail.
+*/
+
+Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab)
+{
+  TMP_TABLE_PARAM *tmp_tbl= tab ? tab->tmp_table_param : &join->tmp_table_param;
+
+  /* 
+     Choose method for presenting result to user. Use end_send_group
+     if the query requires grouping (has a GROUP BY clause and/or one or
+     more aggregate functions). Use end_send if the query should not
+     be grouped.
+   */
+  if (join->sort_and_group && !tmp_tbl->precomputed_group_by)
+  {
+    DBUG_PRINT("info",("Using end_send_group"));
+    return end_send_group;
   }
-  return end_select;
+  DBUG_PRINT("info",("Using end_send"));
+  return end_send;
 }
 
 
@@ -17678,19 +17679,13 @@ Next_select_func setup_end_select_func(JOIN *join)
 */
 
 static int
-do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
+do_select(JOIN *join, Procedure *procedure)
 {
   int rc= 0;
   enum_nested_loop_state error= NESTED_LOOP_OK;
-  JOIN_TAB *UNINIT_VAR(join_tab);
   DBUG_ENTER("do_select");
-  
-  join->procedure=procedure;
-  join->tmp_table= table;			/* Save for easy recursion */
-  join->fields= fields;
-  join->do_select_call_count++;
 
-  if (join->pushdown_query && join->do_select_call_count == 1)
+  if (join->pushdown_query)
   {
     /* Select fields are in the temporary table */
     join->fields= &join->tmp_fields_list1;
@@ -17698,34 +17693,33 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     join->set_items_ref_array(join->items1);
     /* The storage engine will take care of the group by query result */
     int res= join->pushdown_query->execute(join);
-    DBUG_RETURN(res);
-  }
 
-  if (table)
-  {
-    (void) table->file->extra(HA_EXTRA_WRITE_CACHE);
-    empty_record(table);
-    if (table->group && join->tmp_table_param.sum_func_count &&
-        table->s->keys && !table->file->inited)
+    if (res)
+      DBUG_RETURN(res);
+
+    if (join->pushdown_query->store_data_in_temp_table)
     {
-      rc= table->file->ha_index_init(0, 0);
-      if (rc)
-      {
-        table->file->print_error(rc, MYF(0));
-        DBUG_RETURN(-1);
-      }
+      JOIN_TAB *last_tab= join->join_tab + join->table_count;
+      last_tab->next_select= end_send;
+
+      enum_nested_loop_state state= last_tab->aggr->end_send();
+      if (state >= NESTED_LOOP_OK)
+        state= sub_select(join, last_tab, true);
+
+      if (state < NESTED_LOOP_OK)
+        res= 1;
+
+      if (join->result->send_eof())
+        res= 1;
     }
+    DBUG_RETURN(res);
   }
-  /* Set up select_end */
-  Next_select_func end_select= setup_end_select_func(join);
-  if (join->table_count)
-  {
-    join->join_tab[join->top_join_tab_count - 1].next_select= end_select;
-    join_tab=join->join_tab+join->const_tables;
-  }
+  
+  join->procedure= procedure;
   join->send_records=0;
-  if (join->table_count == join->const_tables)
+  if (join->only_const_tables() && !join->need_tmp)
   {
+    Next_select_func end_select= setup_end_select_func(join, NULL);
     /*
       HAVING will be checked after processing aggregate functions,
       But WHERE should checked here (we alredy have read tables).
@@ -17737,8 +17731,9 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     DBUG_ASSERT(join->outer_ref_cond == NULL);
     if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int())
     {
+      // HAVING will be checked by end_select
       error= (*end_select)(join, 0, 0);
-      if (error == NESTED_LOOP_OK || error == NESTED_LOOP_QUERY_LIMIT)
+      if (error >= NESTED_LOOP_OK)
 	error= (*end_select)(join, 0, 1);
 
       /*
@@ -17754,7 +17749,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
       if (!join->having || join->having->val_int())
       {
         List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
-                                   fields);
+                                   join->fields);
         rc= join->result->send_data(*columns_list) > 0;
       }
     }
@@ -17768,8 +17763,6 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   }
   else
   {
-    DBUG_ASSERT(join->table_count);
-
     DBUG_EXECUTE_IF("show_explain_probe_do_select", 
                     if (dbug_user_var_equals_int(join->thd, 
                                                  "show_explain_probe_select_id", 
@@ -17777,15 +17770,13 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
                           dbug_serve_apcs(join->thd, 1);
                    );
 
+    JOIN_TAB *join_tab= join->join_tab + join->const_tables;
     if (join->outer_ref_cond && !join->outer_ref_cond->val_int())
       error= NESTED_LOOP_NO_MORE_ROWS;
     else
-      error= sub_select(join,join_tab,0);
-    if ((error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS) &&
-        join->thd->killed != ABORT_QUERY)
-      error= sub_select(join,join_tab,1);
-    if (error == NESTED_LOOP_QUERY_LIMIT)
-      error= NESTED_LOOP_OK;                    /* select_limit used */
+      error= join->first_select(join,join_tab,0);
+    if (error >= NESTED_LOOP_OK && join->thd->killed != ABORT_QUERY)
+      error= join->first_select(join,join_tab,1);
   }
 
   join->thd->limit_found_rows= join->send_records;
@@ -17793,23 +17784,37 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
   if (error == NESTED_LOOP_NO_MORE_ROWS || join->thd->killed == ABORT_QUERY)
     error= NESTED_LOOP_OK;
 
-  if (table)
+  /*
+    For "order by with limit", we cannot rely on send_records, but need
+    to use the rowcount read originally into the join_tab applying the
+    filesort. There cannot be any post-filtering conditions, nor any
+    following join_tabs in this case, so this rowcount properly represents
+    the correct number of qualifying rows.
+  */
+  if (join->order)
   {
-    int tmp, new_errno= 0;
-    if ((tmp=table->file->extra(HA_EXTRA_NO_CACHE)))
+    // Save # of found records prior to cleanup
+    JOIN_TAB *sort_tab;
+    JOIN_TAB *join_tab= join->join_tab;
+    uint const_tables= join->const_tables;
+
+    // Take record count from first non constant table or from last tmp table
+    if (join->aggr_tables > 0)
+      sort_tab= join_tab + join->top_join_tab_count + join->aggr_tables - 1;
+    else
     {
-      DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed"));
-      new_errno= tmp;
+      DBUG_ASSERT(!join->only_const_tables());
+      sort_tab= join_tab + const_tables;
     }
-    if ((tmp=table->file->ha_index_or_rnd_end()))
+    if (sort_tab->filesort &&
+        join->select_options & OPTION_FOUND_ROWS &&
+        sort_tab->filesort->sortorder &&
+        sort_tab->filesort->limit != HA_POS_ERROR)
     {
-      DBUG_PRINT("error",("ha_index_or_rnd_end() failed"));
-      new_errno= tmp;
+      join->thd->limit_found_rows= sort_tab->records;
     }
-    if (new_errno)
-      table->file->print_error(new_errno,MYF(0));
   }
-  else
+
   {
     /*
       The following will unlock all cursors if the command wasn't an
@@ -17823,11 +17828,8 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
       Sic: this branch works even if rc != 0, e.g. when
       send_data above returns an error.
     */
-    if (!table)					// If sending data to client
-    {
-      if (join->result->send_eof())
-	rc= 1;                                  // Don't send error
-    }
+    if (join->result->send_eof())
+      rc= 1;                                  // Don't send error
     DBUG_PRINT("info",("%ld records output", (long) join->send_records));
   }
   else
@@ -17838,7 +17840,8 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
     DBUG_PRINT("error",("Error: do_select() failed"));
   }
 #endif
-  DBUG_RETURN(join->thd->is_error() ? -1 : rc);
+  rc= join->thd->is_error() ? -1 : rc;
+  DBUG_RETURN(rc);
 }
 
 
@@ -17855,6 +17858,106 @@ int rr_sequential_and_unpack(READ_RECORD *info)
 }
 
 
+/**
+  @brief
+  Instantiates temporary table
+
+  @param  table           Table object that describes the table to be
+                          instantiated
+  @param  keyinfo         Description of the index (there is always one index)
+  @param  start_recinfo   Column descriptions
+  @param  recinfo INOUT   End of column descriptions
+  @param  options         Option bits
+
+  @details
+    Creates tmp table and opens it.
+
+  @return
+     FALSE - OK
+     TRUE  - Error
+*/
+
+static
+bool instantiate_tmp_table(TABLE *table, KEY *keyinfo, 
+                           MARIA_COLUMNDEF *start_recinfo,
+                           MARIA_COLUMNDEF **recinfo, 
+                           ulonglong options)
+{
+  if (table->s->db_type() == TMP_ENGINE_HTON)
+  {
+    if (create_internal_tmp_table(table, keyinfo, start_recinfo, recinfo,
+                                  options))
+      return TRUE;
+    // Make empty record so random data is not written to disk
+    empty_record(table);
+  }
+  if (open_tmp_table(table))
+    return TRUE;
+
+  return FALSE;
+}
+
+
+/**
+  @brief 
+  Accumulate rows of the result of an aggregation operation in a tmp table
+
+  @param join  pointer to the structure providing all context info for the query
+  @param join_tab the JOIN_TAB object to which the operation is attached
+  @param end_records  TRUE <=> all records were accumulated, send them further
+
+  @details
+  This function accumulates records of the aggreagation operation for 
+  the node join_tab from the execution plan in a tmp table. To add a new
+  record the function calls join_tab->aggr->put_records.
+  When there is no more records to save, in this
+  case the end_of_records argument == true, function tells the operation to
+  send records further by calling aggr->send_records().
+  When all records are sent this function passes 'end_of_records' signal
+  further by calling sub_select() with end_of_records argument set to
+  true. After that aggr->end_send() is called to tell the operation that
+  it could end internal buffer scan.
+
+  @note
+  This function is not expected to be called when dynamic range scan is
+  used to scan join_tab because  range scans aren't used for tmp tables.
+
+  @return
+    return one of enum_nested_loop_state.
+*/
+
+enum_nested_loop_state
+sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
+{
+  enum_nested_loop_state rc;
+  AGGR_OP *aggr= join_tab->aggr;
+
+  /* This function cannot be called if join_tab has no associated aggregation */
+  DBUG_ASSERT(aggr != NULL);
+
+  DBUG_ENTER("sub_select_aggr_tab");
+
+  if (join->thd->killed)
+  {
+    /* The user has aborted the execution of the query */
+    join->thd->send_kill_message();
+    DBUG_RETURN(NESTED_LOOP_KILLED);
+  }
+
+  if (end_of_records)
+  {
+    rc= aggr->end_send();
+    if (rc >= NESTED_LOOP_OK)
+      rc= sub_select(join, join_tab, end_of_records);
+    DBUG_RETURN(rc);
+  }
+
+  rc= aggr->put_record();
+
+  DBUG_RETURN(rc);
+}
+
+
 /*
   Fill the join buffer with partial records, retrieve all full matches for
   them
@@ -17908,7 +18011,8 @@ sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
   if (end_of_records)
   {
     rc= cache->join_records(FALSE);
-    if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
+    if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
+        rc == NESTED_LOOP_QUERY_LIMIT)
       rc= sub_select(join, join_tab, end_of_records);
     DBUG_RETURN(rc);
   }
@@ -17935,7 +18039,8 @@ sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
            without it. If it's not the case remove it.
   */ 
   rc= cache->join_records(TRUE);
-  if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS)
+  if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
+      rc == NESTED_LOOP_QUERY_LIMIT)
     rc= sub_select(join, join_tab, end_of_records);
   DBUG_RETURN(rc);
 }
@@ -18022,7 +18127,7 @@ sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
     is the same as the value of the predicate, otherwise it's just returns
     true. 
     To carry out a return to a nested loop level of join table t the pointer 
-    to t is remembered in the field 'return_tab' of the join structure.
+    to t is remembered in the field 'return_rtab' of the join structure.
     Consider the following query:
     @code
         SELECT * FROM t1,
@@ -18085,7 +18190,8 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
   int error;
   enum_nested_loop_state rc= NESTED_LOOP_OK;
   READ_RECORD *info= &join_tab->read_record;
-   
+
+
   for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table;
        flush_dups_table;
        flush_dups_table= flush_dups_table->next_flush_table)
@@ -18176,7 +18282,6 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
   DBUG_RETURN(rc);
 }
 
-
 /**
   @brief Process one row of the nested loop join.
 
@@ -18238,6 +18343,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
       condition is true => a match is found.
     */
     join_tab->tracker->r_rows_after_where++;
+
     bool found= 1;
     while (join_tab->first_unmatched && found)
     {
@@ -18579,11 +18685,11 @@ join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
 	!table->no_keyread &&
         (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
     {
-      table->enable_keyread();
+      table->set_keyread(true);
       tab->index= tab->ref.key;
     }
     error=join_read_const(tab);
-    table->disable_keyread();
+    table->set_keyread(false);
     if (error)
     {
       tab->info= ET_UNIQUE_ROW_NOT_FOUND;
@@ -19030,12 +19136,23 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
 
 int join_init_read_record(JOIN_TAB *tab)
 {
-  if (tab->select && tab->select->quick && tab->select->quick->reset())
+  int error;
+
+  if (tab->distinct && tab->remove_duplicates())  // Remove duplicates.
+    return 1;
+  if (tab->filesort && tab->sort_table())     // Sort table.
     return 1;
+
+  if (tab->select && tab->select->quick && (error= tab->select->quick->reset()))
+  {
+    /* Ensures error status is propageted back to client */
+    report_error(tab->table, error);
+    return 1;
+  }
   if (!tab->preread_init_done && tab->preread_init())
     return 1;
   if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
-                       tab->select, tab->filesort, 1,1, FALSE))
+                       tab->select, tab->filesort_result, 1,1, FALSE))
     return 1;
   return (*tab->read_record.read_record)(&tab->read_record);
 }
@@ -19053,7 +19170,7 @@ join_read_record_no_init(JOIN_TAB *tab)
   save_copy_end= tab->read_record.copy_field_end;
   
   init_read_record(&tab->read_record, tab->join->thd, tab->table,
-		   tab->select, tab->filesort, 1, 1, FALSE);
+		   tab->select, tab->filesort_result, 1, 1, FALSE);
 
   tab->read_record.copy_field=     save_copy;
   tab->read_record.copy_field_end= save_copy_end;
@@ -19062,6 +19179,25 @@ join_read_record_no_init(JOIN_TAB *tab)
   return (*tab->read_record.read_record)(&tab->read_record);
 }
 
+
+/*
+  Helper function for sorting table with filesort.
+*/
+
+bool
+JOIN_TAB::sort_table()
+{
+  int rc;
+  DBUG_PRINT("info",("Sorting for index"));
+  THD_STAGE_INFO(join->thd, stage_creating_sort_index);
+  DBUG_ASSERT(join->ordered_index_usage != (filesort->order == join->order ?
+                                            JOIN::ordered_index_order_by :
+                                            JOIN::ordered_index_group_by));
+  rc= create_sort_index(join->thd, join, this, NULL);
+  return (rc != 0);
+}
+
+
 static int
 join_read_first(JOIN_TAB *tab)
 {
@@ -19071,7 +19207,7 @@ join_read_first(JOIN_TAB *tab)
 
   if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
       !table->key_read)
-    table->enable_keyread();
+    table->set_keyread(true);
   tab->table->status=0;
   tab->read_record.read_record=join_read_next;
   tab->read_record.table=table;
@@ -19111,7 +19247,7 @@ join_read_last(JOIN_TAB *tab)
 
   if (table->covering_keys.is_set(tab->index) && !table->no_keyread &&
       !table->key_read)
-    table->enable_keyread();
+    table->set_keyread(true);
   tab->table->status=0;
   tab->read_record.read_record=join_read_prev;
   tab->read_record.table=table;
@@ -19235,16 +19371,21 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	 bool end_of_records)
 {
   DBUG_ENTER("end_send");
+  /*
+    When all tables are const this function is called with jointab == NULL.
+    This function shouldn't be called for the first join_tab as it needs
+    to get fields from previous tab.
+  */
+  DBUG_ASSERT(join_tab == NULL || join_tab != join->join_tab);
+  //TODO pass fields via argument
+  List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
+
   if (!end_of_records)
   {
+#if 0    
+#endif
     if (join->table_count &&
-        (join->join_tab->is_using_loose_index_scan() ||
-         /*
-           When order by used a loose scan as its input, the quick select may
-           be attached to pre_sort_join_tab.
-         */
-         (join->pre_sort_join_tab &&
-          join->pre_sort_join_tab->is_using_loose_index_scan())))
+        join->join_tab->is_using_loose_index_scan())
     {
       /* Copy non-aggregated fields when loose index scan is used. */
       copy_fields(&join->tmp_table_param);
@@ -19261,7 +19402,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     {
       int error;
       /* result < 0 if row was not accepted and should not be counted */
-      if ((error= join->result->send_data(*join->fields)))
+      if ((error= join->result->send_data(*fields)))
         DBUG_RETURN(error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR);
     }
 
@@ -19270,13 +19411,15 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
         !join->do_send_rows)
     {
       /*
-        If filesort is used for sorting, stop after select_limit_cnt+1
-        records are read. Because of optimization in some cases it can
-        provide only select_limit_cnt+1 records.
+        If we have used Priority Queue for optimizing order by with limit,
+        then stop here, there are no more records to consume.
+        When this optimization is used, end_send is called on the next
+        join_tab.
       */
-      if (join->order && join->sortorder &&
-          join->filesort_found_rows &&
-          join->select_options & OPTION_FOUND_ROWS)
+      if (join->order &&
+          join->select_options & OPTION_FOUND_ROWS &&
+          join_tab > join->join_tab &&
+          (join_tab - 1)->filesort && (join_tab - 1)->filesort->using_pq)
       {
         DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT"));
         DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
@@ -19288,7 +19431,7 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       if (join->select_options & OPTION_FOUND_ROWS)
       {
 	JOIN_TAB *jt=join->join_tab;
-	if ((join->table_count == 1) && !join->tmp_table && !join->sort_and_group
+	if ((join->table_count == 1) && !join->sort_and_group
 	    && !join->send_group_parts && !join->having && !jt->select_cond &&
 	    !(jt->select && jt->select->quick) &&
 	    (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
@@ -19297,10 +19440,9 @@ end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  /* Join over all rows in table;  Return number of found rows */
 	  TABLE *table=jt->table;
 
-	  join->select_options ^= OPTION_FOUND_ROWS;
-	  if (jt->filesort)                     // If filesort was used
+	  if (jt->filesort_result)                     // If filesort was used
 	  {
-	    join->send_records= jt->filesort->found_rows;
+	    join->send_records= jt->filesort_result->found_rows;
 	  }
 	  else
 	  {
@@ -19351,13 +19493,21 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 {
   int idx= -1;
   enum_nested_loop_state ok_code= NESTED_LOOP_OK;
+  List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
   DBUG_ENTER("end_send_group");
 
+  if (!join->items3.is_null() && !join->set_group_rpa)
+  {
+    join->set_group_rpa= true;
+    join->set_items_ref_array(join->items3);
+  }
+
   if (!join->first_record || end_of_records ||
       (idx=test_if_group_changed(join->group_fields)) >= 0)
   {
-    if (join->first_record || 
-        (end_of_records && !join->group && !join->group_optimized_away))
+    if (!join->group_sent &&
+        (join->first_record ||
+         (end_of_records && !join->group && !join->group_optimized_away)))
     {
       if (join->procedure)
 	join->procedure->end_group();
@@ -19371,7 +19521,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
  	  else
 	  {
 	    if (join->do_send_rows)
-	      error=join->procedure->send_row(*join->fields) ? 1 : 0;
+	      error=join->procedure->send_row(*fields) ? 1 : 0;
 	    join->send_records++;
 	  }
 	  if (end_of_records && join->procedure->end_of_records())
@@ -19383,11 +19533,8 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  {
             List_iterator_fast<Item> it(*join->fields);
             Item *item;
-            DBUG_PRINT("info", ("no matching rows"));
-
-	    /* No matching rows for group function */
-	    join->clear();
-            join->no_rows_in_result_called= 1;
+            /* No matching rows for group function */
+            join->clear();
 
             while ((item= it++))
               item->no_rows_in_result();
@@ -19398,7 +19545,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  {
 	    if (join->do_send_rows)
             {
-	      error= join->result->send_data(*join->fields);
+	      error=join->result->send_data(*fields);
               if (error < 0)
               {
                 /* Duplicate row, don't count */
@@ -19407,6 +19554,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
               }
             }
 	    join->send_records++;
+            join->group_sent= true;
 	  }
 	  if (join->rollup.state != ROLLUP::STATE_NONE && error <= 0)
 	  {
@@ -19458,6 +19606,7 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	DBUG_RETURN(NESTED_LOOP_ERROR);
       if (join->procedure)
 	join->procedure->add();
+      join->group_sent= false;
       DBUG_RETURN(ok_code);
     }
   }
@@ -19474,16 +19623,16 @@ static enum_nested_loop_state
 end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  bool end_of_records)
 {
-  TABLE *table=join->tmp_table;
+  TABLE *const table= join_tab->table;
   DBUG_ENTER("end_write");
 
   if (!end_of_records)
   {
-    copy_fields(&join->tmp_table_param);
-    if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
+    copy_fields(join_tab->tmp_table_param);
+    if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
       DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
 
-    if (!join->having || join->having->val_int())
+    if (!join_tab->having || join_tab->having->val_int())
     {
       int error;
       join->found_records++;
@@ -19493,15 +19642,16 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	  goto end;
         bool is_duplicate;
 	if (create_internal_tmp_table_from_heap(join->thd, table, 
-                                                join->tmp_table_param.start_recinfo,
-                                                &join->tmp_table_param.recinfo,
+                                                join_tab->tmp_table_param->start_recinfo,
+                                                &join_tab->tmp_table_param->recinfo,
                                                 error, 1, &is_duplicate))
 	  DBUG_RETURN(NESTED_LOOP_ERROR);        // Not a table_is_full error
         if (is_duplicate)
           goto end;
 	table->s->uniques=0;			// To ensure rows are the same
       }
-      if (++join->send_records >= join->tmp_table_param.end_write_records &&
+      if (++join_tab->send_records >=
+            join_tab->tmp_table_param->end_write_records &&
 	  join->do_send_rows)
       {
 	if (!(join->select_options & OPTION_FOUND_ROWS))
@@ -19536,7 +19686,7 @@ static enum_nested_loop_state
 end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	   bool end_of_records)
 {
-  TABLE *table=join->tmp_table;
+  TABLE *const table= join_tab->table;
   ORDER   *group;
   int	  error;
   DBUG_ENTER("end_update");
@@ -19545,7 +19695,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     DBUG_RETURN(NESTED_LOOP_OK);
 
   join->found_records++;
-  copy_fields(&join->tmp_table_param);		// Groups are copied twice.
+  copy_fields(join_tab->tmp_table_param);	// Groups are copied twice.
   /* Make a key of group index */
   for (group=table->group ; group ; group=group->next)
   {
@@ -19565,7 +19715,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       group->buff[-1]= (char) group->field->is_null();
   }
   if (!table->file->ha_index_read_map(table->record[1],
-                                      join->tmp_table_param.group_buff,
+                                      join_tab->tmp_table_param->group_buff,
                                       HA_WHOLE_KEY,
                                       HA_READ_KEY_EXACT))
   {						/* Update old record */
@@ -19581,13 +19731,13 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
   }
 
   init_tmptable_sum_functions(join->sum_funcs);
-  if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
+  if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
   if ((error= table->file->ha_write_tmp_row(table->record[0])))
   {
     if (create_internal_tmp_table_from_heap(join->thd, table,
-                                            join->tmp_table_param.start_recinfo,
-                                            &join->tmp_table_param.recinfo,
+                                       join_tab->tmp_table_param->start_recinfo,
+                                            &join_tab->tmp_table_param->recinfo,
                                             error, 0, NULL))
       DBUG_RETURN(NESTED_LOOP_ERROR);            // Not a table_is_full error
     /* Change method to update rows */
@@ -19597,9 +19747,9 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       DBUG_RETURN(NESTED_LOOP_ERROR);
     }
 
-    join->join_tab[join->top_join_tab_count-1].next_select=end_unique_update;
+    join_tab->aggr->set_write_func(end_unique_update);
   }
-  join->send_records++;
+  join_tab->send_records++;
 end:
   if (join->thd->check_killed())
   {
@@ -19616,7 +19766,7 @@ static enum_nested_loop_state
 end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 		  bool end_of_records)
 {
-  TABLE *table=join->tmp_table;
+  TABLE *table= join_tab->table;
   int	  error;
   DBUG_ENTER("end_unique_update");
 
@@ -19624,12 +19774,12 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     DBUG_RETURN(NESTED_LOOP_OK);
 
   init_tmptable_sum_functions(join->sum_funcs);
-  copy_fields(&join->tmp_table_param);		// Groups are copied twice.
-  if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
+  copy_fields(join_tab->tmp_table_param);		// Groups are copied twice.
+  if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
 
   if (!(error= table->file->ha_write_tmp_row(table->record[0])))
-    join->send_records++;			// New group
+    join_tab->send_records++;			// New group
   else
   {
     if ((int) table->file->get_dup_key(error) < 0)
@@ -19675,7 +19825,7 @@ enum_nested_loop_state
 end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 		bool end_of_records)
 {
-  TABLE *table=join->tmp_table;
+  TABLE *table= join_tab->table;
   int	  idx= -1;
   DBUG_ENTER("end_write_group");
 
@@ -19689,21 +19839,21 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
       int send_group_parts= join->send_group_parts;
       if (idx < send_group_parts)
       {
-	if (!join->first_record)
-	{
-	  /* No matching rows for group function */
-	  join->clear();
-	}
+        if (!join->first_record)
+        {
+          /* No matching rows for group function */
+          join->clear();
+        }
         copy_sum_funcs(join->sum_funcs,
                        join->sum_funcs_end[send_group_parts]);
-	if (!join->having || join->having->val_int())
+	if (!join_tab->having || join_tab->having->val_int())
 	{
           int error= table->file->ha_write_tmp_row(table->record[0]);
           if (error && 
               create_internal_tmp_table_from_heap(join->thd, table,
-                                                  join->tmp_table_param.start_recinfo,
-                                                  &join->tmp_table_param.recinfo,
-                                                  error, 0, NULL))
+                                          join_tab->tmp_table_param->start_recinfo,
+                                          &join_tab->tmp_table_param->recinfo,
+                                                   error, 0, NULL))
 	    DBUG_RETURN(NESTED_LOOP_ERROR);
         }
         if (join->rollup.state != ROLLUP::STATE_NONE)
@@ -19724,8 +19874,8 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
     }
     if (idx < (int) join->send_group_parts)
     {
-      copy_fields(&join->tmp_table_param);
-      if (copy_funcs(join->tmp_table_param.items_to_copy, join->thd))
+      copy_fields(join_tab->tmp_table_param);
+      if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
 	DBUG_RETURN(NESTED_LOOP_ERROR);
       if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1]))
 	DBUG_RETURN(NESTED_LOOP_ERROR);
@@ -20283,9 +20433,11 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
     if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
       DBUG_RETURN(0);
 
+    const ORDER::enum_order keypart_order= 
+      (key_part->key_part_flag & HA_REVERSE_SORT) ? 
+      ORDER::ORDER_DESC : ORDER::ORDER_ASC;
     /* set flag to 1 if we can use read-next on key, else to -1 */
-    flag= ((order->asc == !(key_part->key_part_flag & HA_REVERSE_SORT)) ?
-           1 : -1);
+    flag= (order->direction == keypart_order) ? 1 : -1;
     if (reverse && flag != reverse)
       DBUG_RETURN(0);
     reverse=flag;				// Remember if reverse
@@ -20858,11 +21010,11 @@ check_reverse_order:
          and best_key doesn't, then revert the decision.
       */
       if (!table->covering_keys.is_set(best_key))
-        table->disable_keyread();
+        table->set_keyread(false);
       else
       {
         if (!table->key_read)
-          table->enable_keyread();
+          table->set_keyread(true);
       }
 
       if (!quick_created)
@@ -20893,7 +21045,7 @@ check_reverse_order:
           tab->ref.key_parts= 0;
           if (select_limit < table->stat_records())
             tab->limit= select_limit;
-          table->disable_keyread();
+          table->set_keyread(false);
         }
       }
       else if (tab->type != JT_ALL || tab->select->quick)
@@ -21017,14 +21169,9 @@ use_filesort:
    create_sort_index()
      thd		Thread handler
      join		Join with table to sort
-     order		How table should be sorted
-     filesort_limit	Max number of rows that needs to be sorted
-     select_limit	Max number of rows in final output
-		        Used to decide if we should use index or not
-     is_order_by        true if we are sorting on ORDER BY, false if GROUP BY
-                        Used to decide if we should use index or not     
-
-
+     join_tab		What table to sort
+     fsort              Filesort object.  NULL means "use tab->filesort".
+ 
   IMPLEMENTATION
    - If there is an index that can be used, the first non-const join_tab in
      'join' is modified to use this index.
@@ -21038,113 +21185,34 @@ use_filesort:
     1		No records
 */
 
-static int
-create_sort_index(THD *thd, JOIN *join, ORDER *order,
-		  ha_rows filesort_limit, ha_rows select_limit,
-                  bool is_order_by)
+int
+create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort)
 {
   uint length;
   TABLE *table;
   SQL_SELECT *select;
-  JOIN_TAB *tab;
   bool quick_created= FALSE;
   SORT_INFO *file_sort= 0;
   DBUG_ENTER("create_sort_index");
 
-  if (join->table_count == join->const_tables)
-    DBUG_RETURN(0);				// One row, no need to sort
-  tab=    join->join_tab + join->const_tables;
-  table=  tab->table;
-  select= tab->select;
-  
-  JOIN_TAB *save_pre_sort_join_tab= NULL;
-  if (join->pre_sort_join_tab)
-  {
-    /*
-      we've already been in this function, and stashed away the
-      original access method in join->pre_sort_join_tab, restore it
-      now.
-    */
-    
-    /* First, restore state of the handler */
-    if (join->pre_sort_index != MAX_KEY)
-    {
-      if (table->file->ha_index_or_rnd_end())
-        goto err;
-      if (join->pre_sort_idx_pushed_cond)
-      {
-        table->file->idx_cond_push(join->pre_sort_index,
-                                 join->pre_sort_idx_pushed_cond);
-      }
-    }
-    else
-    {
-      if (table->file->ha_index_or_rnd_end() || 
-          table->file->ha_rnd_init(TRUE))
-        goto err;
-    }
-
-    /* Second, restore access method parameters */
-    tab->records=           join->pre_sort_join_tab->records;
-    tab->select=            join->pre_sort_join_tab->select;
-    tab->select_cond=       join->pre_sort_join_tab->select_cond;
-    tab->type=              join->pre_sort_join_tab->type;
-    tab->read_first_record= join->pre_sort_join_tab->read_first_record; 
-
-    save_pre_sort_join_tab= join->pre_sort_join_tab;
-    join->pre_sort_join_tab= NULL;
-  }
-  else
-  {
-    /* 
-      Save index #, save index condition. Do it right now, because MRR may 
-    */
-    if (table->file->inited == handler::INDEX)
-    {
-      join->pre_sort_index= table->file->active_index;
-      join->pre_sort_idx_pushed_cond= table->file->pushed_idx_cond;
-      // no need to save key_read
-    }
-    else
-      join->pre_sort_index= MAX_KEY;
-  }
+  if (fsort == NULL)
+    fsort= tab->filesort;
 
+  // One row, no need to sort. make_tmp_tables_info should already handle this.
+  DBUG_ASSERT(!join->only_const_tables() && fsort);
+  table=  tab->table;
+  select= fsort->select;
+ 
   /* Currently ORDER BY ... LIMIT is not supported in subqueries. */
   DBUG_ASSERT(join->group_list || !join->is_in_subquery());
 
   /*
-    When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
-    and thus force sorting on disk unless a group min-max optimization
-    is going to be used as it is applied now only for one table queries
-    with covering indexes.
-    The expections is if we are already using the index for GROUP BY
-    (in which case sort would be free) or ORDER and GROUP BY are different.
-  */
-  if ((order != join->group_list || 
-       !(join->select_options & SELECT_BIG_RESULT) ||
-       (select && select->quick &&
-        select->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) &&
-      test_if_skip_sort_order(tab,order,select_limit,0, 
-                              is_order_by ?  &table->keys_in_use_for_order_by :
-                              &table->keys_in_use_for_group_by))
-  {
-    tab->update_explain_data(join->const_tables);
-    DBUG_RETURN(0);
-  }
-  tab->update_explain_data(join->const_tables);
-
-  /*
     Calculate length of join->order as this may be longer than 'order',
     which may come from 'group by'. This is needed as join->sortorder is
     used both for grouping and ordering.
   */
   length= 0;
-  for (ORDER *ord= join->order; ord; ord= ord->next)
-    length++;
 
-    if (!(join->sortorder= 
-        make_unireg_sortorder(thd, order, &length, join->sortorder)))
-    goto err;				/* purecov: inspected */
 
   table->status=0;				// May be wrong if quick_select
 
@@ -21152,36 +21220,48 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     goto err;
 
   // If table has a range, move it to select
-  if (select && !select->quick && tab->ref.key >= 0)
+  if (select && tab->ref.key >= 0)
   {
-    if (tab->quick)
+    if (!select->quick)
     {
-      select->quick=tab->quick;
-      tab->quick=0;
+      if (tab->quick)
+      {
+        select->quick= tab->quick;
+        tab->quick= NULL;
       /* 
         We can only use 'Only index' if quick key is same as ref_key
         and in index_merge 'Only index' cannot be used
       */
       if (((uint) tab->ref.key != select->quick->index))
-        table->disable_keyread();
+        table->set_keyread(FALSE);
+      }
+      else
+      {
+        /*
+	  We have a ref on a const;  Change this to a range that filesort
+	  can use.
+	  For impossible ranges (like when doing a lookup on NULL on a NOT NULL
+	  field, quick will contain an empty record set.
+        */
+        if (!(select->quick= (tab->type == JT_FT ?
+			      get_ft_select(thd, table, tab->ref.key) :
+			      get_quick_select_for_ref(thd, table, &tab->ref, 
+                                                       tab->found_records))))
+	  goto err;
+        quick_created= TRUE;
+      }
+      fsort->own_select= true;
     }
     else
     {
-      /*
-	We have a ref on a const;  Change this to a range that filesort
-	can use.
-	For impossible ranges (like when doing a lookup on NULL on a NOT NULL
-	field, quick will contain an empty record set.
-      */
-      if (!(select->quick= (tab->type == JT_FT ?
-			    get_ft_select(thd, table, tab->ref.key) :
-			    get_quick_select_for_ref(thd, table, &tab->ref, 
-                                                     tab->found_records))))
-	goto err;
-      quick_created= TRUE;
+      DBUG_ASSERT(tab->type == JT_REF || tab->type == JT_EQ_REF);
+      // Update ref value
+      if ((cp_buffer_from_ref(thd, table, &tab->ref) && thd->is_fatal_error))
+        goto err;                                   // out of memory
     }
   }
 
+ 
   /* Fill schema tables with data before filesort if it's necessary */
   if ((join->select_lex->options & OPTION_SCHEMA_TABLE) &&
       get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX))
@@ -21189,11 +21269,9 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
 
   if (table->s->tmp_table)
     table->file->info(HA_STATUS_VARIABLE);	// Get record count
-  file_sort= filesort(thd, table, join->sortorder, length,
-                      select, filesort_limit, 0,
-                      join->explain->ops_tracker.report_sorting(thd));
-  DBUG_ASSERT(tab->filesort == 0);
-  tab->filesort= file_sort;
+  file_sort= filesort(thd, table, fsort, tab->keep_current_rowid, fsort->tracker);
+  DBUG_ASSERT(tab->filesort_result == 0);
+  tab->filesort_result= file_sort;
   tab->records= 0;
   if (file_sort)
   {
@@ -21207,42 +21285,18 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
     /* This will delete the quick select. */
     select->cleanup();
   }
-
-  if (!join->pre_sort_join_tab)
-  {
-    if (save_pre_sort_join_tab)
-      join->pre_sort_join_tab= save_pre_sort_join_tab;
-    else if (!(join->pre_sort_join_tab= (JOIN_TAB*)thd->alloc(sizeof(JOIN_TAB))))
-      goto err;
-  }
-
-  *(join->pre_sort_join_tab)= *tab;
-  
-  tab->select=NULL;
-  tab->set_select_cond(NULL, __LINE__);
-  tab->type=JT_ALL;				// Read with normal read_record
-  tab->read_first_record= join_init_read_record;
-  tab->table->file->ha_index_or_rnd_end();
+ 
+  table->set_keyread(FALSE); // Restore if we used indexes
+  if (tab->type == JT_FT)
+    table->file->ft_end();
+  else
+    table->file->ha_index_or_rnd_end();
 
   DBUG_RETURN(file_sort == 0);
 err:
   DBUG_RETURN(-1);
 }
 
-void JOIN::clean_pre_sort_join_tab()
-{
-  //TABLE *table=  pre_sort_join_tab->table;
-  /*
-   Note: we can come here for fake_select_lex object. That object will have
-   the table already deleted by st_select_lex_unit::cleanup().  
-    We rely on that fake_select_lex didn't have quick select.
-  */
-  if (pre_sort_join_tab->select && pre_sort_join_tab->select->quick)
-  {
-    pre_sort_join_tab->select->cleanup();
-  }
-}
-
 
 /**
   Compare fields from table->record[0] and table->record[1],
@@ -21306,22 +21360,28 @@ static void free_blobs(Field **ptr)
     Rows that do not satisfy 'having' condition are also removed.
 */
 
-static int
-remove_duplicates(JOIN *join, TABLE *table, List<Item> &fields, Item *having)
+bool
+JOIN_TAB::remove_duplicates()
+
 {
-  int error;
+  bool error;
   ulong keylength= 0;
   uint field_count;
+  List<Item> *fields= (this-1)->fields;
   THD *thd= join->thd;
 
   DBUG_ENTER("remove_duplicates");
-  join->explain->ops_tracker.report_duplicate_removal();
+
+  DBUG_ASSERT(join->aggr_tables > 0 && table->s->tmp_table != NO_TMP_TABLE);
+  THD_STAGE_INFO(join->thd, stage_removing_duplicates);
+
+  //join->explain->ops_tracker.report_duplicate_removal();
 
   table->reginfo.lock_type=TL_WRITE;
 
   /* Calculate how many saved fields there is in list */
   field_count=0;
-  List_iterator<Item> it(fields);
+  List_iterator<Item> it(*fields);
   Item *item;
   while ((item=it++))
   {
@@ -21332,7 +21392,7 @@ remove_duplicates(JOIN *join, TABLE *table, List<Item> &fields, Item *having)
   if (!field_count && !(join->select_options & OPTION_FOUND_ROWS) && !having) 
   {                    // only const items with no OPTION_FOUND_ROWS
     join->unit->select_limit_cnt= 1;		// Only send first row
-    DBUG_RETURN(0);
+    DBUG_RETURN(false);
   }
 
   Field **first_field=table->field+table->s->fields - field_count;
@@ -21560,67 +21620,9 @@ err:
 }
 
 
-SORT_FIELD *make_unireg_sortorder(THD *thd, ORDER *order, uint *length,
-                                  SORT_FIELD *sortorder)
-{
-  uint count;
-  SORT_FIELD *sort,*pos;
-  DBUG_ENTER("make_unireg_sortorder");
-
-  count=0;
-  for (ORDER *tmp = order; tmp; tmp=tmp->next)
-    count++;
-  if (!sortorder)
-    sortorder= (SORT_FIELD*) thd->alloc(sizeof(SORT_FIELD) *
-                                        (MY_MAX(count, *length) + 1));
-  pos= sort= sortorder;
-
-  if (!pos)
-    DBUG_RETURN(0);
-
-  for (;order;order=order->next,pos++)
-  {
-    Item *const item= order->item[0], *const real_item= item->real_item();
-    pos->field= 0; pos->item= 0;
-    if (real_item->type() == Item::FIELD_ITEM)
-    {
-      // Could be a field, or Item_direct_view_ref wrapping a field
-      DBUG_ASSERT(item->type() == Item::FIELD_ITEM ||
-                  (item->type() == Item::REF_ITEM &&
-                   static_cast<Item_ref*>(item)->ref_type() ==
-                   Item_ref::VIEW_REF));
-      pos->field= static_cast<Item_field*>(real_item)->field;
-    }
-    else if (real_item->type() == Item::SUM_FUNC_ITEM &&
-             !real_item->const_item())
-    {
-      // Aggregate, or Item_aggregate_ref
-      DBUG_ASSERT(item->type() == Item::SUM_FUNC_ITEM ||
-                  (item->type() == Item::REF_ITEM &&
-                   static_cast<Item_ref*>(item)->ref_type() ==
-                   Item_ref::AGGREGATE_REF));
-      pos->field= item->get_tmp_table_field();
-    }
-    else if (real_item->type() == Item::COPY_STR_ITEM)
-    {                                           // Blob patch
-      pos->item= static_cast<Item_copy*>(real_item)->get_item();
-    }
-    else
-      pos->item= item;
-    pos->reverse=! order->asc;
-    DBUG_ASSERT(pos->field != NULL || pos->item != NULL);
-  }
-  *length=count;
-  DBUG_RETURN(sort);
-}
-
-
 /*
   eq_ref: Create the lookup key and check if it is the same as saved key
 
-
-
-
   SYNOPSIS
     cmp_buffer_with_ref()
       tab      Join tab of the accessed table
@@ -21725,7 +21727,7 @@ cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref)
 */
 
 static bool
-find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
+find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
                    ORDER *order, List<Item> &fields, List<Item> &all_fields,
                    bool is_group_field)
 {
@@ -21753,7 +21755,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
                order_item->full_name(), thd->where);
       return TRUE;
     }
-    thd->change_item_tree((Item**)&order->item, (Item*)(ref_pointer_array + count - 1));
+    thd->change_item_tree((Item **)&order->item, (Item *)&ref_pointer_array[count - 1]);
     order->in_field_list= 1;
     order->counter= count;
     order->counter_used= 1;
@@ -21813,7 +21815,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
         'shadowed' a table field with the same name, the table field will be
         chosen over the derived field.
       */
-      order->item= ref_pointer_array + counter;
+      order->item= &ref_pointer_array[counter];
       order->in_field_list=1;
       return FALSE;
     }
@@ -21852,8 +21854,6 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
     return TRUE; /* Wrong field. */
 
   uint el= all_fields.elements;
-  DBUG_ASSERT(all_fields.elements <=
-              thd->lex->current_select->ref_pointer_array_size);
  /* Add new field to field list. */
   all_fields.push_front(order_item, thd->mem_root);
   ref_pointer_array[el]= order_item;
@@ -21868,7 +21868,7 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
   if (order_item->type() == Item::SUM_FUNC_ITEM)
     ((Item_sum *)order_item)->ref_by= all_fields.head_ref();
 
-  order->item= ref_pointer_array + el;
+  order->item= &ref_pointer_array[el];
   return FALSE;
 }
 
@@ -21880,15 +21880,21 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
   the field list.
 */
 
-int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
+int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
 		List<Item> &fields, List<Item> &all_fields, ORDER *order)
-{
+{ 
+  enum_parsing_place parsing_place= thd->lex->current_select->parsing_place;
   thd->where="order clause";
   for (; order; order=order->next)
   {
     if (find_order_in_list(thd, ref_pointer_array, tables, order, fields,
 			   all_fields, FALSE))
       return 1;
+    if ((*order->item)->with_window_func && parsing_place != IN_ORDER_BY)
+    {
+      my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
+      return 1;
+    }
   }
   return 0;
 }
@@ -21921,10 +21927,11 @@ int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
 */
 
 int
-setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
+setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
 	    List<Item> &fields, List<Item> &all_fields, ORDER *order,
 	    bool *hidden_group_fields)
 {
+  enum_parsing_place parsing_place= thd->lex->current_select->parsing_place;
   *hidden_group_fields=0;
   ORDER *ord;
 
@@ -21934,22 +21941,26 @@ setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
   uint org_fields=all_fields.elements;
 
   thd->where="group statement";
-  enum_parsing_place save_place= thd->lex->current_select->parsing_place;
-  thd->lex->current_select->parsing_place= IN_GROUP_BY;
   for (ord= order; ord; ord= ord->next)
   {
     if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
 			   all_fields, TRUE))
       return 1;
     (*ord->item)->marker= UNDEF_POS;		/* Mark found */
-    if ((*ord->item)->with_sum_func)
+    if ((*ord->item)->with_sum_func && parsing_place == IN_GROUP_BY)
     {
       my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name());
       return 1;
     }
+    if ((*ord->item)->with_window_func)
+    {
+      if (parsing_place == IN_GROUP_BY)
+        my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
+      else
+        my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
+      return 1;
+    }
   }
-  thd->lex->current_select->parsing_place= save_place;
-
   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY)
   {
     /*
@@ -22056,14 +22067,16 @@ setup_new_fields(THD *thd, List<Item> &fields,
 */
 
 ORDER *
-create_distinct_group(THD *thd, Item **ref_pointer_array,
+create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
                       ORDER *order_list, List<Item> &fields,
                       List<Item> &all_fields,
 		      bool *all_order_by_fields_used)
 {
   List_iterator<Item> li(fields);
-  Item *item, **orig_ref_pointer_array= ref_pointer_array;
+  Item *item;
+  Ref_ptr_array orig_ref_pointer_array= ref_pointer_array;
   ORDER *order,*group,**prev;
+  uint idx= 0;
 
   *all_order_by_fields_used= 1;
   while ((item=li++))
@@ -22110,16 +22123,14 @@ create_distinct_group(THD *thd, Item **ref_pointer_array,
           Because HEAP tables can't index BIT fields we need to use an
           additional hidden field for grouping because later it will be
           converted to a LONG field. Original field will remain of the
-          BIT type and will be returned to a client.
+          BIT type and will be returned [el]client.
         */
         Item_field *new_item= new (thd->mem_root) Item_field(thd, (Item_field*)item);
         int el= all_fields.elements;
-        DBUG_ASSERT(all_fields.elements <=
-                    thd->lex->current_select->ref_pointer_array_size);
         orig_ref_pointer_array[el]= new_item;
         all_fields.push_front(new_item, thd->mem_root);
-        ord->item= orig_ref_pointer_array + el;
-      }
+        ord->item=&orig_ref_pointer_array[el]; 
+     }
       else
       {
         /*
@@ -22127,14 +22138,14 @@ create_distinct_group(THD *thd, Item **ref_pointer_array,
           simple indexing of ref_pointer_array (order in the array and in the
           list are same)
         */
-        ord->item= ref_pointer_array;
+        ord->item= &ref_pointer_array[idx];
       }
-      ord->asc=1;
+      ord->direction= ORDER::ORDER_ASC;
       *prev=ord;
       prev= &ord->next;
     }
 next_item:
-    ref_pointer_array++;
+    idx++;
   }
   *prev=0;
   return group;
@@ -22206,7 +22217,7 @@ test_if_subpart(ORDER *a,ORDER *b)
   for (; a && b; a=a->next,b=b->next)
   {
     if ((*a->item)->eq(*b->item,1))
-      a->asc=b->asc;
+      a->direction=b->direction;
     else
       return 0;
   }
@@ -22384,9 +22395,9 @@ make_group_fields(JOIN *main_join, JOIN *curr_join)
 
 
 /**
-  Get a list of buffers for saveing last group.
+  Get a list of buffers for saving last group.
 
-  Groups are saved in reverse order for easyer check loop.
+  Groups are saved in reverse order for easier check loop.
 */
 
 static bool
@@ -22437,8 +22448,13 @@ int test_if_item_cache_changed(List<Cached_item> &list)
 }
 
 
+/*
+  @return
+    -1         - Group not changed
+   value>=0    - Number of the component where the group changed
+*/
 
-static int
+int
 test_if_group_changed(List<Cached_item> &list)
 {
   DBUG_ENTER("test_if_group_changed");
@@ -22487,7 +22503,7 @@ test_if_group_changed(List<Cached_item> &list)
 
 bool
 setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
-		  Item **ref_pointer_array,
+		  Ref_ptr_array ref_pointer_array,
 		  List<Item> &res_selected_fields, List<Item> &res_all_fields,
 		  uint elements, List<Item> &all_fields)
 {
@@ -22716,7 +22732,8 @@ bool JOIN::alloc_func_list()
     1  error
 */
 
-bool JOIN::make_sum_func_list(List<Item> &field_list, List<Item> &send_result_set_metadata,
+bool JOIN::make_sum_func_list(List<Item> &field_list,
+                              List<Item> &send_result_set_metadata,
 			      bool before_group_by, bool recompute)
 {
   List_iterator_fast<Item> it(field_list);
@@ -22771,7 +22788,7 @@ bool JOIN::make_sum_func_list(List<Item> &field_list, List<Item> &send_result_se
 */
 
 static bool
-change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array,
+change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
 			 List<Item> &res_selected_fields,
 			 List<Item> &res_all_fields,
 			 uint elements, List<Item> &all_fields)
@@ -22808,14 +22825,6 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array,
         Item_field *new_field= new (thd->mem_root) Item_temptable_field(thd, field);
         if (!suv || !new_field)
           DBUG_RETURN(true);                  // Fatal error
-        /*
-          We are replacing the argument of Item_func_set_user_var after
-          its value has been read.  The argument's null_value should be
-          set by now, so we must set it explicitly for the replacement
-          argument since the null_value may be read without any
-          preceeding call to val_*().
-        */
-        new_field->update_null_value();
         List<Item> list;
         list.push_back(new_field, thd->mem_root);
         suv->set_arguments(thd, list);
@@ -22889,7 +22898,7 @@ change_to_use_tmp_fields(THD *thd, Item **ref_pointer_array,
 */
 
 static bool
-change_refs_to_tmp_fields(THD *thd, Item **ref_pointer_array,
+change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
 			  List<Item> &res_selected_fields,
 			  List<Item> &res_all_fields, uint elements,
 			  List<Item> &all_fields)
@@ -23242,17 +23251,23 @@ bool JOIN::rollup_init()
   */
   tmp_table_param.group_parts= send_group_parts;
 
-  if (!(rollup.null_items= (Item_null_result**) thd->alloc((sizeof(Item*) +
-                                                sizeof(Item**) +
-                                                sizeof(List<Item>) +
-				                ref_pointer_array_size)
-				                * send_group_parts )))
-    return 1;
-  
-  rollup.fields= (List<Item>*) (rollup.null_items + send_group_parts);
-  rollup.ref_pointer_arrays= (Item***) (rollup.fields + send_group_parts);
+  Item_null_result **null_items=
+    static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
+
+  rollup.null_items= Item_null_array(null_items, send_group_parts);
+  rollup.ref_pointer_arrays=
+    static_cast<Ref_ptr_array*>
+    (thd->alloc((sizeof(Ref_ptr_array) +
+                 all_fields.elements * sizeof(Item*)) * send_group_parts));
+  rollup.fields=
+    static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
+
+  if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
+    return true;
+
   ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
 
+
   /*
     Prepare space for field list for the different levels
     These will be filled up in rollup_make_fields()
@@ -23262,7 +23277,7 @@ bool JOIN::rollup_init()
     rollup.null_items[i]= new (thd->mem_root) Item_null_result(thd);
     List<Item> *rollup_fields= &rollup.fields[i];
     rollup_fields->empty();
-    rollup.ref_pointer_arrays[i]= ref_array;
+    rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements);
     ref_array+= all_fields.elements;
   }
   for (i= 0 ; i < send_group_parts; i++)
@@ -23409,11 +23424,12 @@ bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
     bool real_fields= 0;
     Item *item;
     List_iterator<Item> new_it(rollup.fields[pos]);
-    Item **ref_array_start= rollup.ref_pointer_arrays[pos];
+    Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
     ORDER *start_group;
 
     /* Point to first hidden field */
-    Item **ref_array= ref_array_start + fields_arg.elements-1;
+    uint ref_array_ix= fields_arg.elements-1;
+
 
     /* Remember where the sum functions ends for the previous level */
     sum_funcs_end[pos+1]= *func;
@@ -23430,7 +23446,7 @@ bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
       if (item == first_field)
       {
 	real_fields= 1;				// End of hidden fields
-	ref_array= ref_array_start;
+	ref_array_ix= 0;
       }
 
       if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
@@ -23474,15 +23490,15 @@ bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
 	  }
 	}
       }
-      *ref_array= item;
+      ref_array_start[ref_array_ix]= item;
       if (real_fields)
       {
 	(void) new_it++;			// Point to next item
 	new_it.replace(item);			// Replace previous
-	ref_array++;
+	ref_array_ix++;
       }
       else
-	ref_array--;
+	ref_array_ix--;
     }
   }
   sum_funcs_end[0]= *func;			// Point to last function
@@ -23515,9 +23531,7 @@ int JOIN::rollup_send_data(uint idx)
   {
     int res= 0;
     /* Get reference pointers to sum functions in place */
-    memcpy((char*) ref_pointer_array,
-	   (char*) rollup.ref_pointer_arrays[i],
-	   ref_pointer_array_size);
+    copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
     if ((!having || having->val_int()))
     {
       if (send_records < unit->select_limit_cnt && do_send_rows &&
@@ -23528,7 +23542,7 @@ int JOIN::rollup_send_data(uint idx)
     }
   }
   /* Restore ref_pointer_array */
-  set_items_ref_array(current_ref_pointer_array);
+  set_items_ref_array(current_ref_ptrs);
   return 0;
 }
 
@@ -23558,9 +23572,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
   for (i= send_group_parts ; i-- > idx ; )
   {
     /* Get reference pointers to sum functions in place */
-    memcpy((char*) ref_pointer_array,
-	   (char*) rollup.ref_pointer_arrays[i],
-	   ref_pointer_array_size);
+    copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
     if ((!having || having->val_int()))
     {
       int write_error;
@@ -23583,7 +23595,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
     }
   }
   /* Restore ref_pointer_array */
-  set_items_ref_array(current_ref_pointer_array);
+  set_items_ref_array(current_ref_ptrs);
   return 0;
 }
 
@@ -23706,33 +23718,9 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
 }
 
 
-/*
-  TODO: this function is only applicable for the first non-const optimization
-  join tab. 
-*/
-
-void JOIN_TAB::update_explain_data(uint idx)
-{
-  if (this == join->first_breadth_first_optimization_tab() + join->const_tables &&
-      join->select_lex->select_number != INT_MAX &&
-      join->select_lex->select_number != UINT_MAX)
-  {
-    Explain_table_access *eta= new (join->thd->mem_root)
-      Explain_table_access(join->thd->mem_root);
-    save_explain_data(eta, join->const_table_map, join->select_distinct,
-                      join->first_breadth_first_optimization_tab());
-
-    Explain_select *sel= join->thd->lex->explain->
-      get_select(join->select_lex->select_number);
-    idx -= my_count_bits(join->eliminated_tables);
-    sel->replace_table(idx, eta);
-  }
-}
-
-
 void JOIN_TAB::save_explain_data(Explain_table_access *eta,
                                  table_map prefix_tables, 
-                                 bool distinct, JOIN_TAB *first_top_tab)
+                                 bool distinct_arg, JOIN_TAB *first_top_tab)
 {
   int quick_type;
   CHARSET_INFO *cs= system_charset_info;
@@ -23748,6 +23736,22 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
   explain_plan= eta;
   eta->key.clear();
   eta->quick_info= NULL;
+  eta->using_filesort= false;
+
+  SQL_SELECT *tab_select;
+  /* 
+    We assume that if this table does pre-sorting, then it doesn't do filtering
+    with SQL_SELECT.
+  */
+  DBUG_ASSERT(!(select && filesort));
+  tab_select= (filesort)? filesort->select : select;
+
+  if (filesort)
+  {
+    eta->using_filesort= true; // This fixes EXPLAIN
+    eta->fs_tracker= filesort->tracker= 
+      new Filesort_tracker(thd->lex->analyze_stmt);
+  }
   
   tracker= &eta->tracker;
   jbuf_tracker= &eta->jbuf_tracker;
@@ -23825,9 +23829,9 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
   /* "type" column */
   enum join_type tab_type= type;
   if ((type == JT_ALL || type == JT_HASH) &&
-       select && select->quick && use_quick != 2)
+       tab_select && tab_select->quick && use_quick != 2)
   {
-    cur_quick= select->quick;
+    cur_quick= tab_select->quick;
     quick_type= cur_quick->get_type();
     if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
         (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) ||
@@ -23862,9 +23866,9 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
     In STRAIGHT_JOIN queries, there can be join tabs with JT_CONST type
     that still have quick selects.
   */
-  if (select && select->quick && tab_type != JT_CONST)
+  if (tab_select && tab_select->quick && tab_type != JT_CONST)
   {
-    eta->quick_info= select->quick->get_explain(thd->mem_root);
+    eta->quick_info= tab_select->quick->get_explain(thd->mem_root);
   }
 
   if (key_info) /* 'index' or 'ref' access */
@@ -23988,7 +23992,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
     uint keyno= MAX_KEY;
     if (ref.key_parts)
       keyno= ref.key;
-    else if (select && cur_quick)
+    else if (tab_select && cur_quick)
       keyno = cur_quick->index;
 
     if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
@@ -24010,7 +24014,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
     {
       eta->push_extra(ET_USING);
     }
-    if (select)
+    if (tab_select)
     {
       if (use_quick == 2)
       {
@@ -24020,7 +24024,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
           eta->range_checked_fer->
             append_possible_keys_stat(thd->mem_root, table, keys);
       }
-      else if (select->cond ||
+      else if (tab_select->cond ||
                (cache_select && cache_select->cond))
       {
         const COND *pushed_cond= table->file->pushed_cond;
@@ -24033,7 +24037,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
         }
         else
         {
-          eta->where_cond= select->cond;
+          eta->where_cond= tab_select->cond;
           eta->cache_cond= cache_select? cache_select->cond : NULL;
           eta->push_extra(ET_USING_WHERE);
         }
@@ -24065,7 +24069,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
       if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
       {
         QUICK_GROUP_MIN_MAX_SELECT *qgs= 
-          (QUICK_GROUP_MIN_MAX_SELECT *) select->quick;
+          (QUICK_GROUP_MIN_MAX_SELECT *) tab_select->quick;
         eta->push_extra(ET_USING_INDEX_FOR_GROUP_BY);
         eta->loose_scan_is_scanning= qgs->loose_scan_is_scanning();
       }
@@ -24077,14 +24081,15 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
 
     if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE)
     {
-      explain_append_mrr_info((QUICK_RANGE_SELECT*)(select->quick),
+      explain_append_mrr_info((QUICK_RANGE_SELECT*)(tab_select->quick),
                               &eta->mrr_type);
       if (eta->mrr_type.length() > 0)
         eta->push_extra(ET_USING_MRR);
     }
 
-    if (distinct & test_all_bits(prefix_tables, join->select_list_used_tables))
+    if (shortcut_for_distinct)
       eta->push_extra(ET_DISTINCT);
+
     if (loosescan_match_tab)
     {
       eta->push_extra(ET_LOOSESCAN);
@@ -24152,14 +24157,55 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta,
 
 
 /*
+  Walk through join->aggr_tables and save aggregation/grouping query plan into
+  an Explain_select object
+*/
+
+void save_agg_explain_data(JOIN *join, Explain_select *xpl_sel)
+{
+  JOIN_TAB *join_tab=join->join_tab + join->top_join_tab_count;
+  Explain_aggr_node *prev_node;
+  Explain_aggr_node *node= xpl_sel->aggr_tree;
+
+  for (uint i= 0; i < join->aggr_tables; i++, join_tab++)
+  {
+    // Each aggregate means a temp.table
+    prev_node= node;
+    node= new Explain_aggr_tmp_table;
+    node->child= prev_node;
+
+    if (join_tab->distinct)
+    {
+      prev_node= node;
+      node= new Explain_aggr_remove_dups;
+      node->child= prev_node;
+    }
+
+    if (join_tab->filesort)
+    {
+      Explain_aggr_filesort *eaf = new Explain_aggr_filesort;
+      eaf->tracker= new Filesort_tracker(join->thd->lex->analyze_stmt);
+      join_tab->filesort->tracker= eaf->tracker;
+
+      prev_node= node;
+      node= eaf;
+      node->child= prev_node;
+    }
+  }
+  xpl_sel->aggr_tree= node;
+}
+
+
+/*
   Save Query Plan Footprint
 
   @note
     Currently, this function may be called multiple times
 */
 
-int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
-                                   bool need_order, bool distinct, 
+int JOIN::save_explain_data_intern(Explain_query *output, 
+                                   bool need_tmp_table_arg,
+                                   bool need_order_arg, bool distinct_arg, 
                                    const char *message)
 {
   JOIN *join= this; /* Legacy: this code used to be a non-member function */
@@ -24188,7 +24234,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
     explain->select_id= join->select_lex->select_number;
     explain->select_type= join->select_lex->type;
     explain->using_temporary= need_tmp;
-    explain->using_filesort=  need_order;
+    explain->using_filesort=  need_order_arg;
     /* Setting explain->message means that all other members are invalid */
     explain->message= message;
 
@@ -24205,7 +24251,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
     explain->select_id=   select_lex->select_number;
     explain->select_type= select_lex->type;
     explain->using_temporary= need_tmp;
-    explain->using_filesort=  need_order;
+    explain->using_filesort=  need_order_arg;
     explain->message= "Storage engine handles GROUP BY";
 
     if (select_lex->master_unit()->derived)
@@ -24225,12 +24271,8 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
     xpl_sel->select_type= join->select_lex->type;
     if (select_lex->master_unit()->derived)
       xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
-
-    if (need_tmp_table)
-      xpl_sel->using_temporary= true;
-
-    if (need_order)
-      xpl_sel->using_filesort= true;
+    
+    save_agg_explain_data(this, xpl_sel);
 
     xpl_sel->exec_const_cond= exec_const_cond;
     xpl_sel->outer_ref_cond= outer_ref_cond;
@@ -24240,7 +24282,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
       xpl_sel->having= having;
     xpl_sel->having_value= having_value;
 
-    JOIN_TAB* const first_top_tab= join->first_breadth_first_optimization_tab();
+    JOIN_TAB* const first_top_tab= join->first_breadth_first_tab();
     JOIN_TAB* prev_bush_root_tab= NULL;
 
     Explain_basic_join *cur_parent= xpl_sel;
@@ -24259,13 +24301,6 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
       }
 
 
-      if (join->table_access_tabs == join->join_tab &&
-          tab == (first_top_tab + join->const_tables) && pre_sort_join_tab)
-      {
-        saved_join_tab= tab;
-        tab= pre_sort_join_tab;
-      }
-
       Explain_table_access *eta= (new (output->mem_root)
                                   Explain_table_access(output->mem_root));
 
@@ -24296,7 +24331,7 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
       prev_bush_root_tab= tab->bush_root_tab;
 
       cur_parent->add_table(eta, output);
-      tab->save_explain_data(eta, used_tables, distinct, first_top_tab);
+      tab->save_explain_data(eta, used_tables, distinct_arg, first_top_tab);
 
       if (saved_join_tab)
         tab= saved_join_tab;
@@ -24358,11 +24393,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
   /* Update the QPF with latest values of using_temporary, using_filesort */
   Explain_select *explain_sel;
   uint select_nr= join->select_lex->select_number;
-  if ((explain_sel= thd->lex->explain->get_select(select_nr)))
-  {
-    explain_sel->using_temporary= need_tmp_table;
-    explain_sel->using_filesort= need_order;
-  }
 
   for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit();
        unit;
@@ -24427,18 +24457,17 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
   {
     thd->lex->current_select= first;
     unit->set_limit(unit->global_parameters());
-    res= mysql_select(thd, &first->ref_pointer_array,
-			first->table_list.first,
-			first->with_wild, first->item_list,
-			first->where,
-			first->order_list.elements +
-			first->group_list.elements,
-			first->order_list.first,
-			first->group_list.first,
-			first->having,
-			thd->lex->proc_list.first,
-			first->options | thd->variables.option_bits | SELECT_DESCRIBE,
-			result, unit, first);
+    res= mysql_select(thd, 
+                      first->table_list.first,
+                      first->with_wild, first->item_list,
+                      first->where,
+                      first->order_list.elements + first->group_list.elements,
+                      first->order_list.first,
+                      first->group_list.first,
+                      first->having,
+                      thd->lex->proc_list.first,
+                      first->options | thd->variables.option_bits | SELECT_DESCRIBE,
+                      result, unit, first);
   }
   DBUG_RETURN(res || thd->is_error());
 }
@@ -24482,6 +24511,7 @@ static void print_table_array(THD *thd,
       str->append(STRING_WITH_LEN(" semi join "));
     else
       str->append(STRING_WITH_LEN(" join "));
+    
     curr->print(thd, eliminated_tables, str, query_type);
     if (curr->on_expr)
     {
@@ -25810,6 +25840,152 @@ err:
   DBUG_RETURN(0);
 }
 
+/****************************************************************************
+  AGGR_OP implementation
+****************************************************************************/
+
+/**
+  @brief Instantiate tmp table for aggregation and start index scan if needed
+  @todo Tmp table always would be created, even for empty result. Extend
+        executor to avoid tmp table creation when no rows were written
+        into tmp table.
+  @return
+    true  error
+    false ok
+*/
+
+bool
+AGGR_OP::prepare_tmp_table()
+{
+  TABLE *table= join_tab->table;
+  JOIN *join= join_tab->join;
+  int rc= 0;
+
+  if (!join_tab->table->is_created())
+  {
+    if (instantiate_tmp_table(table, join_tab->tmp_table_param->keyinfo,
+                              join_tab->tmp_table_param->start_recinfo,
+                              &join_tab->tmp_table_param->recinfo,
+                              join->select_options))
+      return true;
+    (void) table->file->extra(HA_EXTRA_WRITE_CACHE);
+    empty_record(table);
+  }
+  /* If it wasn't already, start index scan for grouping using table index. */
+  if (!table->file->inited && table->group &&
+      join_tab->tmp_table_param->sum_func_count && table->s->keys)
+    rc= table->file->ha_index_init(0, 0);
+  else
+  {
+    /* Start index scan in scanning mode */
+    rc= table->file->ha_rnd_init(true);
+  }
+  if (rc)
+  {
+    table->file->print_error(rc, MYF(0));
+    return true;
+  }
+  return false;
+}
+
+
+/**
+  @brief Prepare table if necessary and call write_func to save record
+
+  @param end_of_records  the end_of_record signal to pass to the writer
+
+  @return return one of enum_nested_loop_state.
+*/
+
+enum_nested_loop_state
+AGGR_OP::put_record(bool end_of_records)
+{
+  // Lasy tmp table creation/initialization
+  if (!join_tab->table->file->inited)
+    prepare_tmp_table();
+  enum_nested_loop_state rc= (*write_func)(join_tab->join, join_tab,
+                                           end_of_records);
+  return rc;
+}
+
+
+/**
+  @brief Finish rnd/index scan after accumulating records, switch ref_array,
+         and send accumulated records further.
+  @return return one of enum_nested_loop_state.
+*/
+
+enum_nested_loop_state
+AGGR_OP::end_send()
+{
+  enum_nested_loop_state rc= NESTED_LOOP_OK;
+  TABLE *table= join_tab->table;
+  JOIN *join= join_tab->join;
+
+  // All records were stored, send them further
+  int tmp, new_errno= 0;
+
+  if ((rc= put_record(true)) < NESTED_LOOP_OK)
+    return rc;
+
+  if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
+  {
+    DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed"));
+    new_errno= tmp;
+  }
+  if ((tmp= table->file->ha_index_or_rnd_end()))
+  {
+    DBUG_PRINT("error",("ha_index_or_rnd_end() failed"));
+    new_errno= tmp;
+  }
+  if (new_errno)
+  {
+    table->file->print_error(new_errno,MYF(0));
+    return NESTED_LOOP_ERROR;
+  }
+
+  // Update ref array
+  join_tab->join->set_items_ref_array(*join_tab->ref_array);
+  if (join_tab->window_funcs)
+  {
+    join_tab->window_funcs->exec(join);
+  }
+
+  table->reginfo.lock_type= TL_UNLOCK;
+
+  bool in_first_read= true;
+  while (rc == NESTED_LOOP_OK)
+  {
+    int error;
+    if (in_first_read)
+    {
+      in_first_read= false;
+      error= join_init_read_record(join_tab);
+    }
+    else
+      error= join_tab->read_record.read_record(&join_tab->read_record);
+
+    if (error > 0 || (join->thd->is_error()))   // Fatal error
+      rc= NESTED_LOOP_ERROR;
+    else if (error < 0)
+      break;
+    else if (join->thd->killed)		  // Aborted by user
+    {
+      join->thd->send_kill_message();
+      rc= NESTED_LOOP_KILLED;
+    }
+    else
+      rc= evaluate_join_record(join, join_tab, 0);
+  }
+
+  // Finish rnd scn after sending records
+  if (join_tab->table->file->inited)
+    join_tab->table->file->ha_rnd_end();
+
+  return rc;
+}
+
+
 /**
   @} (end of group Query_Optimizer)
 */
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 87de9316c3a..f5b9cb4684d 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -34,6 +34,7 @@
 #include "opt_range.h"                /* SQL_SELECT, QUICK_SELECT_I */
 #include "filesort.h"
 
+typedef struct st_join_table JOIN_TAB;
 /* Values in optimize */
 #define KEY_OPTIMIZE_EXISTS		1
 #define KEY_OPTIMIZE_REF_OR_NULL	2
@@ -184,7 +185,7 @@ enum sj_strategy_enum
 
 typedef enum_nested_loop_state
 (*Next_select_func)(JOIN *, struct st_join_table *, bool);
-Next_select_func setup_end_select_func(JOIN *join);
+Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
 int rr_sequential(READ_RECORD *info);
 int rr_sequential_and_unpack(READ_RECORD *info);
 
@@ -198,9 +199,11 @@ int rr_sequential_and_unpack(READ_RECORD *info);
 class JOIN_CACHE;
 class SJ_TMP_TABLE;
 class JOIN_TAB_RANGE;
+class AGGR_OP;
+class Filesort;
 
 typedef struct st_join_table {
-  st_join_table() {}                          /* Remove gcc warning */
+  st_join_table() {}
   TABLE		*table;
   KEYUSE	*keyuse;			/**< pointer to first used key */
   KEY           *hj_key;       /**< descriptor of the used best hash join key
@@ -237,7 +240,6 @@ typedef struct st_join_table {
     For join tabs that are inside an SJM bush: root of the bush
   */
   st_join_table *bush_root_tab;
-  SORT_INFO *filesort;
 
   /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */
   bool          last_leaf_in_bush;
@@ -261,6 +263,7 @@ typedef struct st_join_table {
   */
   uint          packed_info;
 
+  //  READ_RECORD::Setup_func materialize_table;
   READ_RECORD::Setup_func read_first_record;
   Next_select_func next_select;
   READ_RECORD	read_record;
@@ -347,6 +350,7 @@ typedef struct st_join_table {
   */
   Item          *cache_idx_cond;
   SQL_SELECT    *cache_select;
+  AGGR_OP       *aggr;
   JOIN		*join;
   /*
     Embedding SJ-nest (may be not the direct parent), or NULL if none.
@@ -413,6 +417,46 @@ typedef struct st_join_table {
   /* NestedOuterJoins: Bitmap of nested joins this table is part of */
   nested_join_map embedding_map;
 
+  /* Tmp table info */
+  TMP_TABLE_PARAM *tmp_table_param;
+
+  /* Sorting related info */
+  Filesort *filesort;
+  SORT_INFO *filesort_result;
+  
+  /*
+    Non-NULL value means this join_tab must do window function computation
+    before reading.
+  */
+  Window_funcs_computation* window_funcs;
+
+  /**
+    List of topmost expressions in the select list. The *next* JOIN TAB
+    in the plan should use it to obtain correct values. Same applicable to
+    all_fields. These lists are needed because after tmp tables functions
+    will be turned to fields. These variables are pointing to
+    tmp_fields_list[123]. Valid only for tmp tables and the last non-tmp
+    table in the query plan.
+    @see JOIN::make_tmp_tables_info()
+  */
+  List<Item> *fields;
+  /** List of all expressions in the select list */
+  List<Item> *all_fields;
+  /*
+    Pointer to the ref array slice which to switch to before sending
+    records. Valid only for tmp tables.
+  */
+  Ref_ptr_array *ref_array;
+
+  /** Number of records saved in tmp table */
+  ha_rows send_records;
+
+  /** HAVING condition for checking prior saving a record into tmp table*/
+  Item *having;
+
+  /** TRUE <=> remove duplicates on this table. */
+  bool distinct;
+
   /*
     Semi-join strategy to be used for this join table. This is a copy of
     POSITION::sj_strategy field. This field is set up by the
@@ -427,9 +471,9 @@ typedef struct st_join_table {
   void cleanup();
   inline bool is_using_loose_index_scan()
   {
-    return (select && select->quick &&
-            (select->quick->get_type() ==
-             QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX));
+    const SQL_SELECT *sel= filesort ? filesort->select : select;
+    return (sel && sel->quick &&
+            (sel->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX));
   }
   bool is_using_agg_loose_index_scan ()
   {
@@ -564,16 +608,22 @@ typedef struct st_join_table {
   void save_explain_data(Explain_table_access *eta, table_map prefix_tables, 
                          bool distinct, struct st_join_table *first_top_tab);
 
-  void update_explain_data(uint idx);
+  bool use_order() const; ///< Use ordering provided by chosen index?
+  bool sort_table();
+  bool remove_duplicates();
+
 } JOIN_TAB;
 
 
 #include "sql_join_cache.h"
 
-enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
-                                        end_of_records);
-enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
-                                  end_of_records);
+enum_nested_loop_state
+sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+enum_nested_loop_state 
+sub_select(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+enum_nested_loop_state
+sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+
 enum_nested_loop_state
 end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
 	       bool end_of_records);
@@ -868,12 +918,14 @@ typedef struct st_position
   Sj_materialization_picker sjmat_picker;
 } POSITION;
 
+typedef Bounds_checked_array<Item_null_result*> Item_null_array;
+
 typedef struct st_rollup
 {
   enum State { STATE_NONE, STATE_INITED, STATE_READY };
   State state;
-  Item_null_result **null_items;
-  Item ***ref_pointer_arrays;
+  Item_null_array null_items;
+  Ref_ptr_array *ref_pointer_arrays;
   List<Item> *fields;
 } ROLLUP;
 
@@ -887,6 +939,56 @@ public:
 
 class Pushdown_query;
 
+/**
+  @brief
+    Class to perform postjoin aggregation operations
+
+  @details
+    The result records are obtained on the put_record() call.
+    The aggrgation process is determined by the write_func, it could be:
+      end_write          Simply store all records in tmp table.
+      end_write_group    Perform grouping using join->group_fields,
+                         records are expected to be sorted.
+      end_update         Perform grouping using the key generated on tmp
+                         table. Input records aren't expected to be sorted.
+                         Tmp table uses the heap engine
+      end_update_unique  Same as above, but the engine is myisam.
+
+    Lazy table initialization is used - the table will be instantiated and
+    rnd/index scan started on the first put_record() call.
+
+*/
+
+class AGGR_OP :public Sql_alloc
+{
+public:
+  JOIN_TAB *join_tab;
+
+  AGGR_OP(JOIN_TAB *tab) : join_tab(tab), write_func(NULL)
+  {};
+
+  enum_nested_loop_state put_record() { return put_record(false); };
+  /*
+    Send the result of operation further (to a next operation/client)
+    This function is called after all records were put into tmp table.
+
+    @return return one of enum_nested_loop_state values.
+  */
+  enum_nested_loop_state end_send();
+  /** write_func setter */
+  void set_write_func(Next_select_func new_write_func)
+  {
+    write_func= new_write_func;
+  }
+
+private:
+  /** Write function that would be used for saving records in tmp table. */
+  Next_select_func write_func;
+  enum_nested_loop_state put_record(bool end_of_records);
+  bool prepare_tmp_table();
+};
+
+
 class JOIN :public Sql_alloc
 {
 private:
@@ -955,33 +1057,11 @@ protected:
 
 public:
   JOIN_TAB *join_tab, **best_ref;
-  
-  /* 
-    Saved join_tab for pre_sorting. create_sort_index() will save here.. 
-  */
-  JOIN_TAB *pre_sort_join_tab;
-  uint pre_sort_index;
-  Item *pre_sort_idx_pushed_cond;
-  void clean_pre_sort_join_tab();
 
   /* List of fields that aren't under an aggregate function */
   List<Item_field> non_agg_fields;
 
-  /*
-    For "Using temporary+Using filesort" queries, JOIN::join_tab can point to
-    either: 
-    1. array of join tabs describing how to run the select, or
-    2. array of single join tab describing read from the temporary table.
-
-    SHOW EXPLAIN code needs to read/show #1. This is why two next members are
-    there for saving it.
-  */
-  JOIN_TAB *table_access_tabs;
-  uint     top_table_access_tabs_count;
-  
   JOIN_TAB **map2table;    ///< mapping between table indexes and JOIN_TABs
-  JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution
-
   List<JOIN_TAB_RANGE> join_tab_ranges;
   
   /*
@@ -1012,14 +1092,9 @@ public:
     We keep it here so that it is saved/restored with JOIN::restore_tmp.
   */
   uint     top_join_tab_count;
+  uint     aggr_tables;     ///< Number of post-join tmp tables 
   uint	   send_group_parts;
   /*
-    This counts how many times do_select() was invoked for this JOIN.
-    It's used to restrict Pushdown_query::execute() only to the first
-    do_select() invocation.
-  */
-  uint     do_select_call_count;
-  /*
     True if the query has GROUP BY.
     (that is, if group_by != NULL. when DISTINCT is converted into GROUP BY, it
      will set this, too. It is not clear why we need a separate var from 
@@ -1124,6 +1199,7 @@ public:
   */
   table_map complex_firstmatch_tables;
 
+  Next_select_func first_select;
   /*
     The cost of best complete join plan found so far during optimization,
     after optimization phase - cost of picked join order (not taking into
@@ -1139,9 +1215,6 @@ public:
   double   join_record_count;
   List<Item> *fields;
   List<Cached_item> group_fields, group_fields_cache;
-  TABLE    *tmp_table;
-  /// used to store 2 possible tmp table of SELECT
-  TABLE    *exec_tmp_table1, *exec_tmp_table2;
   THD	   *thd;
   Item_sum  **sum_funcs, ***sum_funcs_end;
   /** second copy of sumfuncs (for queries with 2 temporary tables */
@@ -1150,6 +1223,8 @@ public:
   Item	    *having;
   Item      *tmp_having; ///< To store having when processed temporary table
   Item      *having_history; ///< Store having for explain
+  ORDER     *group_list_for_estimates;
+  bool      having_is_correlated;
   ulonglong  select_options;
   /* 
     Bitmap of allowed types of the join caches that
@@ -1188,26 +1263,6 @@ public:
   */
   bool filesort_found_rows;
   
-  /**
-    Copy of this JOIN to be used with temporary tables.
-
-    tmp_join is used when the JOIN needs to be "reusable" (e.g. in a
-    subquery that gets re-executed several times) and we know will use
-    temporary tables for materialization. The materialization to a
-    temporary table overwrites the JOIN structure to point to the
-    temporary table after the materialization is done. This is where
-    tmp_join is used : it's a copy of the JOIN before the
-    materialization and is used in restoring before re-execution by
-    overwriting the current JOIN structure with the saved copy.
-    Because of this we should pay extra care of not freeing up helper
-    structures that are referenced by the original contents of the
-    JOIN. We can check for this by making sure the "current" join is
-    not the temporary copy, e.g.  !tmp_join || tmp_join != join
- 
-    We should free these sub-structures at JOIN::destroy() if the
-    "current" join has a copy is not that copy.
-  */
-  JOIN *tmp_join;
   ROLLUP rollup;				///< Used with rollup
   
   bool mixed_implicit_grouping;
@@ -1229,6 +1284,19 @@ public:
     GROUP/ORDER BY.
   */
   bool simple_order, simple_group;
+
+  /*
+    ordered_index_usage is set if an ordered index access
+    should be used instead of a filesort when computing 
+    ORDER/GROUP BY.
+  */
+  enum
+  {
+    ordered_index_void,       // No ordered index avail.
+    ordered_index_group_by,   // Use index for GROUP BY
+    ordered_index_order_by    // Use index for ORDER BY
+  } ordered_index_usage;
+
   /**
     Is set only in case if we have a GROUP BY clause
     and no ORDER BY after constant elimination of 'order'.
@@ -1281,10 +1349,19 @@ public:
   List<Item> exec_const_order_group_cond;
   SQL_SELECT *select;                ///<created in optimisation phase
   JOIN_TAB *return_tab;              ///<used only for outer joins
-  Item **ref_pointer_array; ///<used pointer reference for this select
-  // Copy of above to be used with different lists
-  Item **items0, **items1, **items2, **items3, **current_ref_pointer_array;
-  uint ref_pointer_array_size; ///< size of above in bytes
+
+  /*
+    Used pointer reference for this select.
+    select_lex->ref_pointer_array contains five "slices" of the same length:
+    |========|========|========|========|========|
+     ref_ptrs items0   items1   items2   items3
+   */
+  Ref_ptr_array ref_ptrs;
+  // Copy of the initial slice above, to be used with different lists
+  Ref_ptr_array items0, items1, items2, items3;
+  // Used by rollup, to restore ref_ptrs after overwriting it.
+  Ref_ptr_array current_ref_ptrs;
+
   const char *zero_result_cause; ///< not 0 if exec must return zero result
   
   bool union_part; ///< this subselect is part of union 
@@ -1311,20 +1388,12 @@ public:
   /* SJM nests that are executed with SJ-Materialization strategy */
   List<SJ_MATERIALIZATION_INFO> sjm_info_list;
 
-  /* 
-    storage for caching buffers allocated during query execution. 
-    These buffers allocations need to be cached as the thread memory pool is
-    cleared only at the end of the execution of the whole query and not caching
-    allocations that occur in repetition at execution time will result in 
-    excessive memory usage.
-    Note: make_simple_join always creates an execution plan that accesses
-    a single table, thus it is sufficient to have a one-element array for
-    table_reexec.
-  */  
-  SORT_FIELD *sortorder;                        // make_unireg_sortorder()
-  TABLE *table_reexec[1];                       // make_simple_join()
-  JOIN_TAB *join_tab_reexec;                    // make_simple_join()
-  /* end of allocation caching storage */
+  /** TRUE <=> ref_pointer_array is set to items3. */
+  bool set_group_rpa;
+  /** Exec time only: TRUE <=> current group has been sent */
+  bool group_sent;
+
+  JOIN_TAB *sort_and_group_aggr_tab;
 
   JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg,
        select_result *result_arg)
@@ -1336,12 +1405,13 @@ public:
   void init(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg,
        select_result *result_arg)
   {
-    join_tab= join_tab_save= 0;
+    join_tab= 0;
     table= 0;
     table_count= 0;
     top_join_tab_count= 0;
     const_tables= 0;
     const_table_map= 0;
+    aggr_tables= 0;
     eliminated_tables= 0;
     join_list= 0;
     implicit_grouping= FALSE;
@@ -1351,25 +1421,21 @@ public:
     send_records= 0;
     found_records= 0;
     fetch_limit= HA_POS_ERROR;
-    join_examined_rows= 0;
-    exec_tmp_table1= 0;
-    exec_tmp_table2= 0;
-    sortorder= 0;
-    table_reexec[0]= 0;
-    join_tab_reexec= 0;
     thd= thd_arg;
     sum_funcs= sum_funcs2= 0;
     procedure= 0;
     having= tmp_having= having_history= 0;
+    having_is_correlated= false;
+    group_list_for_estimates= 0;
     select_options= select_options_arg;
     result= result_arg;
     lock= thd_arg->lock;
     select_lex= 0; //for safety
-    tmp_join= 0;
     select_distinct= MY_TEST(select_options & SELECT_DISTINCT);
     no_order= 0;
     simple_order= 0;
     simple_group= 0;
+    ordered_index_usage= ordered_index_void;
     need_distinct= 0;
     skip_sort_order= 0;
     need_tmp= 0;
@@ -1377,8 +1443,11 @@ public:
     error= 0;
     select= 0;
     return_tab= 0;
-    ref_pointer_array= items0= items1= items2= items3= 0;
-    ref_pointer_array_size= 0;
+    ref_ptrs.reset();
+    items0.reset();
+    items1.reset();
+    items2.reset();
+    items3.reset();
     zero_result_cause= 0;
     optimized= 0;
     have_query_plan= QEP_NOT_PRESENT_YET;
@@ -1392,8 +1461,6 @@ public:
     positions= best_positions= 0;
     pushdown_query= 0;
     original_join_tab= 0;
-    do_select_call_count= 0;
-
     explain= NULL;
 
     all_fields= fields_arg;
@@ -1406,22 +1473,21 @@ public:
     rollup.state= ROLLUP::STATE_NONE;
 
     no_const_tables= FALSE;
+    first_select= sub_select;
+    set_group_rpa= false;
+    group_sent= 0;
+
     outer_ref_cond= pseudo_bits_cond= NULL;
     in_to_exists_where= NULL;
     in_to_exists_having= NULL;
-    pre_sort_join_tab= NULL;
     emb_sjm_nest= NULL;
     sjm_lookup_tables= 0;
-
-    /* 
-      The following is needed because JOIN::cleanup(true) may be called for 
-      joins for which JOIN::optimize was aborted with an error before a proper
-      query plan was produced
-    */
-    table_access_tabs= NULL; 
   }
 
-  int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
+  /* True if the plan guarantees that it will be returned zero or one row */
+  bool only_const_tables()  { return const_tables == table_count; }
+
+  int prepare(TABLE_LIST *tables, uint wind_num,
 	      COND *conds, uint og_num, ORDER *order, bool skip_order_by,
               ORDER *group, Item *having, ORDER *proc_param, SELECT_LEX *select,
 	      SELECT_LEX_UNIT *unit);
@@ -1431,7 +1497,9 @@ public:
   int reinit();
   int init_execution();
   void exec();
+
   void exec_inner();
+  bool prepare_result(List<Item> **columns_list);
   int destroy();
   void restore_tmp();
   bool alloc_func_list();
@@ -1441,16 +1509,42 @@ public:
   bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
 			  bool before_group_by, bool recompute= FALSE);
 
-  inline void set_items_ref_array(Item **ptr)
+  /// Initialzes a slice, see comments for ref_ptrs above.
+  Ref_ptr_array ref_ptr_array_slice(size_t slice_num)
   {
-    memcpy((char*) ref_pointer_array, (char*) ptr, ref_pointer_array_size);
-    current_ref_pointer_array= ptr;
+    size_t slice_sz= select_lex->ref_pointer_array.size() / 5U;
+    DBUG_ASSERT(select_lex->ref_pointer_array.size() % 5 == 0);
+    DBUG_ASSERT(slice_num < 5U);
+    return Ref_ptr_array(&select_lex->ref_pointer_array[slice_num * slice_sz],
+                         slice_sz);
   }
-  inline void init_items_ref_array()
+
+  /**
+     Overwrites one slice with the contents of another slice.
+     In the normal case, dst and src have the same size().
+     However: the rollup slices may have smaller size than slice_sz.
+   */
+  void copy_ref_ptr_array(Ref_ptr_array dst_arr, Ref_ptr_array src_arr)
+  {
+    DBUG_ASSERT(dst_arr.size() >= src_arr.size());
+    void *dest= dst_arr.array();
+    const void *src= src_arr.array();
+    memcpy(dest, src, src_arr.size() * src_arr.element_size());
+  }
+
+  /// Overwrites 'ref_ptrs' and remembers the the source as 'current'.
+  void set_items_ref_array(Ref_ptr_array src_arr)
   {
-    items0= ref_pointer_array + all_fields.elements;
-    memcpy(items0, ref_pointer_array, ref_pointer_array_size);
-    current_ref_pointer_array= items0;
+    copy_ref_ptr_array(ref_ptrs, src_arr);
+    current_ref_ptrs= src_arr;
+  }
+
+  /// Initializes 'items0' and remembers that it is 'current'.
+  void init_items_ref_array()
+  {
+    items0= ref_ptr_array_slice(1);
+    copy_ref_ptr_array(items0, ref_ptrs);
+    current_ref_ptrs= items0;
   }
 
   bool rollup_init();
@@ -1459,18 +1553,10 @@ public:
 			  Item_sum ***func);
   int rollup_send_data(uint idx);
   int rollup_write_data(uint idx, TABLE *table);
-  /**
-    Release memory and, if possible, the open tables held by this execution
-    plan (and nested plans). It's used to release some tables before
-    the end of execution in order to increase concurrency and reduce
-    memory consumption.
-  */
   void join_free();
   /** Cleanup this JOIN, possibly for reuse */
   void cleanup(bool full);
   void clear();
-  bool save_join_tab();
-  bool init_save_join_tab();
   bool send_row_on_empty_set()
   {
     return (do_send_rows && implicit_grouping && !group_optimized_away &&
@@ -1489,6 +1575,8 @@ public:
     return (table_map(1) << table_count) - 1;
   }
   void drop_unused_derived_keys();
+  bool get_best_combination();
+  bool add_sorting_to_table(JOIN_TAB *tab, ORDER *order);
   inline void eval_select_list_used_tables();
   /* 
     Return the table for which an index scan can be used to satisfy 
@@ -1550,16 +1638,44 @@ public:
   int save_explain_data_intern(Explain_query *output, bool need_tmp_table,
                                bool need_order, bool distinct,
                                const char *message);
-  JOIN_TAB *first_breadth_first_optimization_tab() { return table_access_tabs; }
-  JOIN_TAB *first_breadth_first_execution_tab() { return join_tab; }
+  JOIN_TAB *first_breadth_first_tab() { return join_tab; }
 private:
   /**
+    Create a temporary table to be used for processing DISTINCT/ORDER
+    BY/GROUP BY.
+
+    @note Will modify JOIN object wrt sort/group attributes
+
+    @param tab              the JOIN_TAB object to attach created table to
+    @param tmp_table_fields List of items that will be used to define
+                            column types of the table.
+    @param tmp_table_group  Group key to use for temporary table, NULL if none.
+    @param save_sum_fields  If true, do not replace Item_sum items in 
+                            @c tmp_fields list with Item_field items referring 
+                            to fields in temporary table.
+
+    @returns false on success, true on failure
+  */
+  bool create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *tmp_table_fields,
+                                  ORDER *tmp_table_group,
+                                  bool save_sum_fields,
+                                  bool distinct,
+                                  bool keep_row_ordermake);
+  /**
+    Optimize distinct when used on a subset of the tables.
+
+    E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b
+    In this case we can stop scanning t2 when we have found one t1.a
+  */
+  void optimize_distinct();
+
+  /**
     TRUE if the query contains an aggregate function but has no GROUP
     BY clause. 
   */
   bool implicit_grouping; 
-  bool make_simple_join(JOIN *join, TABLE *tmp_table);
   void cleanup_item_list(List<Item> &items) const;
+  bool make_aggr_tables_info();
 };
 
 enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS};
@@ -1584,7 +1700,7 @@ extern const char *join_type_str[];
 void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, 
                        List<Item> &fields, bool reset_with_sum_func);
 bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
-		       Item **ref_pointer_array,
+		       Ref_ptr_array ref_pointer_array,
 		       List<Item> &new_list1, List<Item> &new_list2,
 		       uint elements, List<Item> &fields);
 void copy_fields(TMP_TABLE_PARAM *param);
@@ -1825,19 +1941,19 @@ int safe_index_read(JOIN_TAB *tab);
 int get_quick_record(SQL_SELECT *select);
 SORT_FIELD * make_unireg_sortorder(THD *thd, ORDER *order, uint *length,
                                   SORT_FIELD *sortorder);
-int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
+int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
 		List<Item> &fields, List <Item> &all_fields, ORDER *order);
-int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
+int setup_group(THD *thd,  Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
 		List<Item> &fields, List<Item> &all_fields, ORDER *order,
 		bool *hidden_group_fields);
 bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
-                   Item **ref_pointer_array);
+                    Ref_ptr_array ref_pointer_array);
 int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table,
                    struct st_table_ref *table_ref);
 
 bool handle_select(THD *thd, LEX *lex, select_result *result,
                    ulong setup_tables_done_option);
-bool mysql_select(THD *thd, Item ***rref_pointer_array,
+bool mysql_select(THD *thd,
                   TABLE_LIST *tables, uint wild_num,  List<Item> &list,
                   COND *conds, uint og_num, ORDER *order, ORDER *group,
                   Item *having, ORDER *proc_param, ulonglong select_type, 
@@ -2153,4 +2269,7 @@ public:
   int execute(JOIN *join);
 };
 
+bool test_if_order_compatible(SQL_I_List<ORDER> &a, SQL_I_List<ORDER> &b);
+int test_if_group_changed(List<Cached_item> &list);
+int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort);
 #endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index dad51139af3..583058f80a5 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -9351,7 +9351,6 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
   int error= 1;
   Copy_field *copy= NULL, *copy_end;
   ha_rows found_count= 0, delete_count= 0;
-  SORT_FIELD *sortorder;
   SORT_INFO  *file_sort= 0;
   READ_RECORD info;
   TABLE_LIST   tables;
@@ -9441,7 +9440,6 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
     }
     else
     {
-      uint length= 0;
       bzero((char *) &tables, sizeof(tables));
       tables.table= from;
       tables.alias= tables.table_name= from->s->table_name.str;
@@ -9449,14 +9447,13 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
 
       THD_STAGE_INFO(thd, stage_sorting);
       Filesort_tracker dummy_tracker(false);
+      Filesort fsort(order, HA_POS_ERROR, NULL);
       if (thd->lex->select_lex.setup_ref_array(thd, order_num) ||
           setup_order(thd, thd->lex->select_lex.ref_pointer_array,
-                      &tables, fields, all_fields, order) ||
-          !(sortorder= make_unireg_sortorder(thd, order, &length, NULL)) ||
-          !(file_sort= filesort(thd, from, sortorder, length,
-                               NULL, HA_POS_ERROR,
-                               true,
-                                &dummy_tracker)))
+                      &tables, fields, all_fields, order))
+        goto err;
+
+      if (!(file_sort= filesort(thd, from, &fsort, true, &dummy_tracker)))
         goto err;
     }
     thd_progress_next_stage(thd);
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 5685c90850a..87b836f40d9 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -436,8 +436,7 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
 
     can_skip_order_by= is_union_select && !(sl->braces && sl->explicit_limit);
 
-    saved_error= join->prepare(&sl->ref_pointer_array,
-                               sl->table_list.first,
+    saved_error= join->prepare(sl->table_list.first,
                                sl->with_wild,
                                sl->where,
                                (can_skip_order_by ? 0 :
@@ -646,8 +645,7 @@ bool st_select_lex_unit::prepare(THD *thd_arg, select_result *sel_result,
         fake_select_lex->n_child_sum_items+= global_parameters()->n_sum_items;
 
 	saved_error= fake_select_lex->join->
-	  prepare(&fake_select_lex->ref_pointer_array,
-		  fake_select_lex->table_list.first,
+	  prepare(fake_select_lex->table_list.first,
 		  0, 0,
                   global_parameters()->order_list.elements, // og_num
                   global_parameters()->order_list.first,    // order
@@ -702,7 +700,7 @@ bool st_select_lex_unit::optimize()
       {
         item->assigned(0); // We will reinit & rexecute unit
         item->reset();
-        if (table->created)
+        if (table->is_created())
         {
           table->file->ha_delete_all_rows();
           table->file->info(HA_STATUS_VARIABLE);
@@ -946,13 +944,13 @@ bool st_select_lex_unit::exec()
           Don't add more sum_items if we have already done JOIN::prepare
           for this (with a different join object)
         */
-        if (!fake_select_lex->ref_pointer_array)
+        if (fake_select_lex->ref_pointer_array.is_null())
           fake_select_lex->n_child_sum_items+= global_parameters()->n_sum_items;
         
         if (!was_executed)
           save_union_explain_part2(thd->lex->explain);
 
-        saved_error= mysql_select(thd, &fake_select_lex->ref_pointer_array,
+        saved_error= mysql_select(thd,
                               &result_table_list,
                               0, item_list, NULL,
 				  global_parameters()->order_list.elements,
@@ -975,7 +973,7 @@ bool st_select_lex_unit::exec()
             to reset them back, we re-do all of the actions (yes it is ugly):
           */ // psergey-todo: is the above really necessary anymore?? 
 	  join->init(thd, item_list, fake_select_lex->options, result);
-          saved_error= mysql_select(thd, &fake_select_lex->ref_pointer_array,
+          saved_error= mysql_select(thd,
                                 &result_table_list,
                                 0, item_list, NULL,
 				    global_parameters()->order_list.elements,
@@ -1022,27 +1020,11 @@ bool st_select_lex_unit::cleanup()
   }
   cleaned= 1;
 
-  if (union_result)
-  {
-    delete union_result;
-    union_result=0; // Safety
-    if (table)
-      free_tmp_table(thd, table);
-    table= 0; // Safety
-  }
-
   for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
     error|= sl->cleanup();
 
   if (fake_select_lex)
   {
-    JOIN *join;
-    if ((join= fake_select_lex->join))
-    {
-      join->tables_list= 0;
-      join->table_count= 0;
-      join->top_join_tab_count= 0;
-    }
     error|= fake_select_lex->cleanup();
     /*
       There are two cases when we should clean order items:
@@ -1064,6 +1046,15 @@ bool st_select_lex_unit::cleanup()
     }
   }
 
+  if (union_result)
+  {
+    delete union_result;
+    union_result=0; // Safety
+    if (table)
+      free_tmp_table(thd, table);
+    table= 0; // Safety
+  }
+
   DBUG_RETURN(error);
 }
 
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 61c16a905fe..739bef82ab2 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -341,7 +341,8 @@ int mysql_update(THD *thd,
   if (table_list->is_view())
     unfix_fields(fields);
 
-  if (setup_fields_with_no_wrap(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0))
+  if (setup_fields_with_no_wrap(thd, Ref_ptr_array(),
+                                fields, MARK_COLUMNS_WRITE, 0, 0))
     DBUG_RETURN(1);                     /* purecov: inspected */
   if (table_list->view && check_fields(thd, fields))
   {
@@ -360,7 +361,7 @@ int mysql_update(THD *thd,
   table_list->grant.want_privilege= table->grant.want_privilege=
     (SELECT_ACL & ~table->grant.privilege);
 #endif
-  if (setup_fields(thd, 0, values, MARK_COLUMNS_READ, 0, 0))
+  if (setup_fields(thd, Ref_ptr_array(), values, MARK_COLUMNS_READ, 0, 0))
   {
     free_underlaid_joins(thd, select_lex);
     DBUG_RETURN(1);				/* purecov: inspected */
@@ -557,17 +558,12 @@ int mysql_update(THD *thd,
 	to update
         NOTE: filesort will call table->prepare_for_position()
       */
-      uint         length= 0;
-      SORT_FIELD  *sortorder;
+      Filesort fsort(order, limit, select);
 
       Filesort_tracker *fs_tracker= 
         thd->lex->explain->get_upd_del_plan()->filesort_tracker;
 
-      if (!(sortorder=make_unireg_sortorder(thd, order, &length, NULL)) ||
-          !(file_sort= filesort(thd, table, sortorder, length,
-                                select, limit,
-                                true,
-                                fs_tracker)))
+      if (!(file_sort= filesort(thd, table, &fsort, true, fs_tracker)))
 	goto err;
       thd->inc_examined_row_count(file_sort->examined_rows);
 
@@ -696,7 +692,7 @@ int mysql_update(THD *thd,
       if (error >= 0)
 	goto err;
     }
-    table->disable_keyread();
+    table->set_keyread(false);
     table->column_bitmaps_set(save_read_set, save_write_set);
   }
 
@@ -1050,7 +1046,7 @@ err:
   delete select;
   delete file_sort;
   free_underlaid_joins(thd, select_lex);
-  table->disable_keyread();
+  table->set_keyread(false);
   thd->abort_on_warning= 0;
   DBUG_RETURN(1);
 
@@ -1424,7 +1420,8 @@ int mysql_multi_update_prepare(THD *thd)
   if (lex->select_lex.handle_derived(thd->lex, DT_MERGE))  
     DBUG_RETURN(TRUE);
 
-  if (setup_fields_with_no_wrap(thd, 0, *fields, MARK_COLUMNS_WRITE, 0, 0))
+  if (setup_fields_with_no_wrap(thd, Ref_ptr_array(),
+                                *fields, MARK_COLUMNS_WRITE, 0, 0))
     DBUG_RETURN(TRUE);
 
   for (tl= table_list; tl ; tl= tl->next_local)
@@ -1611,7 +1608,7 @@ bool mysql_multi_update(THD *thd,
   thd->abort_on_warning= thd->is_strict_mode();
   List<Item> total_list;
 
-  res= mysql_select(thd, &select_lex->ref_pointer_array,
+  res= mysql_select(thd,
                     table_list, select_lex->with_wild,
                     total_list,
                     conds, 0, (ORDER *) NULL, (ORDER *)NULL, (Item *) NULL,
@@ -1707,7 +1704,8 @@ int multi_update::prepare(List<Item> &not_used_values,
     reference tables
   */
 
-  int error= setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0);
+  int error= setup_fields(thd, Ref_ptr_array(),
+                          *values, MARK_COLUMNS_READ, 0, 0);
 
   ti.rewind();
   while ((table_ref= ti++))
@@ -2034,7 +2032,7 @@ loop_end:
 
     /* Make an unique key over the first field to avoid duplicated updates */
     bzero((char*) &group, sizeof(group));
-    group.asc= 1;
+    group.direction= ORDER::ORDER_ASC;
     group.item= (Item**) temp_fields.head_ref();
 
     tmp_param->quick_group=1;
diff --git a/sql/sql_window.cc b/sql/sql_window.cc
new file mode 100644
index 00000000000..ef7f512f7fc
--- /dev/null
+++ b/sql/sql_window.cc
@@ -0,0 +1,1737 @@
+#include "sql_select.h"
+#include "item_windowfunc.h"
+#include "filesort.h"
+#include "sql_base.h"
+#include "sql_window.h"
+
+
+bool
+Window_spec::check_window_names(List_iterator_fast<Window_spec> &it)
+{
+  char *name= this->name();
+  char *ref_name= window_reference();
+  it.rewind();
+  Window_spec *win_spec;
+  while((win_spec= it++) && win_spec != this)
+  {
+    char *win_spec_name= win_spec->name();
+    if (!win_spec_name)
+      break;
+    if (name && my_strcasecmp(system_charset_info, name, win_spec_name) == 0)
+    {
+      my_error(ER_DUP_WINDOW_NAME, MYF(0), name);
+      return true;
+    }
+    if (ref_name &&
+        my_strcasecmp(system_charset_info, ref_name, win_spec_name) == 0)
+    {
+      if (partition_list->elements)
+      {
+        my_error(ER_PARTITION_LIST_IN_REFERENCING_WINDOW_SPEC, MYF(0),
+                 ref_name);
+        return true;
+      }
+      if (win_spec->order_list->elements && order_list->elements)
+      {
+        my_error(ER_ORDER_LIST_IN_REFERENCING_WINDOW_SPEC, MYF(0), ref_name);
+        return true;              
+      } 
+      if (win_spec->window_frame) 
+      {
+        my_error(ER_WINDOW_FRAME_IN_REFERENCED_WINDOW_SPEC, MYF(0), ref_name);
+        return true;              
+      }
+      referenced_win_spec= win_spec;
+      if (partition_list->elements == 0)
+        partition_list= win_spec->partition_list;
+      if (order_list->elements == 0)
+        order_list= win_spec->order_list;
+    }
+  }
+  if (ref_name && !referenced_win_spec)
+  {
+    my_error(ER_WRONG_WINDOW_SPEC_NAME, MYF(0), ref_name);
+    return true;              
+  }
+  return false;
+}
+
+bool
+Window_frame::check_frame_bounds()
+{
+  if ((top_bound->is_unbounded() &&
+       top_bound->precedence_type == Window_frame_bound::FOLLOWING) ||
+      (bottom_bound->is_unbounded() &&
+       bottom_bound->precedence_type == Window_frame_bound::PRECEDING) ||
+      (top_bound->precedence_type == Window_frame_bound::CURRENT &&
+       bottom_bound->precedence_type == Window_frame_bound::PRECEDING) ||
+      (bottom_bound->precedence_type == Window_frame_bound::CURRENT &&
+       top_bound->precedence_type == Window_frame_bound::FOLLOWING))
+  {
+    my_error(ER_BAD_COMBINATION_OF_WINDOW_FRAME_BOUND_SPECS, MYF(0));
+    return true;              
+  }
+
+  return false;
+}
+
+
+/*
+  Setup window functions in a select
+*/
+
+int
+setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
+	      List<Item> &fields, List<Item> &all_fields, 
+              List<Window_spec> &win_specs, List<Item_window_func> &win_funcs)
+{
+  Window_spec *win_spec;
+  DBUG_ENTER("setup_windows");
+  List_iterator<Window_spec> it(win_specs);
+
+  /* 
+    Move all unnamed specifications after the named ones.
+    We could have avoided it if we had built two separate lists for
+    named and unnamed specifications.
+  */
+  Query_arena *arena, backup;
+  arena= thd->activate_stmt_arena_if_needed(&backup);
+  uint i = 0;
+  uint elems= win_specs.elements;
+  while ((win_spec= it++) && i++ < elems)
+  {
+    if (win_spec->name() == NULL)
+    {
+      it.remove();
+      win_specs.push_back(win_spec);
+    }
+  }
+  if (arena)
+    thd->restore_active_arena(arena, &backup);
+
+  it.rewind();
+
+  List_iterator_fast<Window_spec> itp(win_specs);
+    
+  while ((win_spec= it++))
+  {
+    bool hidden_group_fields;
+    if (win_spec->check_window_names(itp) ||
+        setup_group(thd, ref_pointer_array, tables, fields, all_fields,
+                    win_spec->partition_list->first, &hidden_group_fields) ||
+        setup_order(thd, ref_pointer_array, tables, fields, all_fields,
+                    win_spec->order_list->first) ||
+        (win_spec->window_frame && 
+         win_spec->window_frame->check_frame_bounds()))
+    {
+      DBUG_RETURN(1);
+    }
+    
+    if (win_spec->window_frame &&
+        win_spec->window_frame->exclusion != Window_frame::EXCL_NONE)
+    {
+      my_error(ER_FRAME_EXCLUSION_NOT_SUPPORTED, MYF(0));
+      DBUG_RETURN(1);
+    }
+    /*
+       For  "win_func() OVER (ORDER BY order_list RANGE BETWEEN ...)",
+       - ORDER BY order_list must not be ommitted
+       - the list must have a single element.
+    */
+    if (win_spec->window_frame && 
+        win_spec->window_frame->units == Window_frame::UNITS_RANGE)
+    {
+      if (win_spec->order_list->elements != 1)
+      {
+        my_error(ER_RANGE_FRAME_NEEDS_SIMPLE_ORDERBY, MYF(0));
+        DBUG_RETURN(1);
+      }
+
+      /*
+        "The declared type of SK shall be numeric, datetime, or interval"
+        we don't support datetime or interval, yet.
+      */
+      Item_result rtype= win_spec->order_list->first->item[0]->result_type();
+      if (rtype != REAL_RESULT && rtype != INT_RESULT && 
+          rtype != DECIMAL_RESULT)
+      {
+        my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0));
+        DBUG_RETURN(1);
+      }
+
+      /*
+        "The declared type of UVS shall be numeric if the declared type of SK 
+        is numeric; otherwise, it shall be an interval type that may be added
+        to or subtracted from the declared type of SK"
+      */
+      Window_frame_bound *bounds[]= {win_spec->window_frame->top_bound,
+                                     win_spec->window_frame->bottom_bound,
+                                     NULL};
+      for (Window_frame_bound **pbound= &bounds[0]; *pbound; pbound++)
+      {
+        if (!(*pbound)->is_unbounded() &&
+            ((*pbound)->precedence_type == Window_frame_bound::FOLLOWING ||
+             (*pbound)->precedence_type == Window_frame_bound::PRECEDING))
+        {
+          Item_result rtype= (*pbound)->offset->result_type();
+          if (rtype != REAL_RESULT && rtype != INT_RESULT && 
+              rtype != DECIMAL_RESULT)
+          {
+            my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0));
+            DBUG_RETURN(1);
+          }
+        }
+      }
+    }
+    
+    /* "ROWS PRECEDING|FOLLOWING $n" must have a numeric $n */
+    if (win_spec->window_frame && 
+        win_spec->window_frame->units == Window_frame::UNITS_ROWS)
+    {
+      Window_frame_bound *bounds[]= {win_spec->window_frame->top_bound,
+                                     win_spec->window_frame->bottom_bound,
+                                     NULL};
+      for (Window_frame_bound **pbound= &bounds[0]; *pbound; pbound++)
+      {
+        if (!(*pbound)->is_unbounded() &&
+            ((*pbound)->precedence_type == Window_frame_bound::FOLLOWING ||
+             (*pbound)->precedence_type == Window_frame_bound::PRECEDING))
+        {
+          Item *offset= (*pbound)->offset;
+          if (offset->result_type() != INT_RESULT)
+          {
+            my_error(ER_WRONG_TYPE_FOR_ROWS_FRAME, MYF(0));
+            DBUG_RETURN(1);
+          }
+        }
+      }
+    }
+  }
+
+  List_iterator_fast<Item_window_func> li(win_funcs);
+  Item_window_func *win_func_item;
+  while ((win_func_item= li++))
+  {
+    win_func_item->update_used_tables();
+  }
+  
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Do a pass over sorted table and compute window function values.
+
+  This function is for handling window functions that can be computed on the
+  fly. Examples are RANK() and ROW_NUMBER().
+*/
+bool compute_window_func_values(Item_window_func *item_win, 
+                                TABLE *tbl, READ_RECORD *info)
+{
+  int err;
+  while (!(err=info->read_record(info)))
+  {
+    store_record(tbl,record[1]);
+    
+    /* 
+      This will cause window function to compute its value for the
+      current row :
+    */
+    item_win->advance_window();
+
+    /*
+      Put the new value into temptable's field
+      TODO: Should this use item_win->update_field() call?
+      Regular aggegate function implementations seem to implement it.
+    */
+    item_win->save_in_field(item_win->result_field, true);
+    err= tbl->file->ha_update_row(tbl->record[1], tbl->record[0]);
+    if (err && err != HA_ERR_RECORD_IS_THE_SAME)
+      return true;
+  }
+  return false;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// Window Frames support 
+/////////////////////////////////////////////////////////////////////////////
+
+// note: make rr_from_pointers static again when not need it here anymore
+int rr_from_pointers(READ_RECORD *info);
+
+/*
+  A temporary way to clone READ_RECORD structures until Monty provides the real
+  one.
+*/
+bool clone_read_record(const READ_RECORD *src, READ_RECORD *dst)
+{
+  //DBUG_ASSERT(src->table->sort.record_pointers);
+  DBUG_ASSERT(src->read_record == rr_from_pointers);
+  memcpy(dst, src, sizeof(READ_RECORD));
+  return false;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+
+/*
+  A cursor over a sequence of rowids. One can
+   - Move to next rowid
+   - jump to given number in the sequence
+   - Know the number of the current rowid (i.e. how many rowids have been read)
+*/
+
+class Rowid_seq_cursor
+{
+  uchar *cache_start;
+  uchar *cache_pos;
+  uchar *cache_end;
+  uint ref_length;
+
+public:
+  void init(READ_RECORD *info)
+  {
+    cache_start= info->cache_pos;
+    cache_pos=   info->cache_pos;
+    cache_end=   info->cache_end;
+    ref_length= info->ref_length;
+  }
+
+  virtual int get_next()
+  {
+    /* Allow multiple get_next() calls in EOF state*/
+    if (cache_pos == cache_end)
+      return -1;
+    cache_pos+= ref_length;
+    return 0;
+  }
+  
+  ha_rows get_rownum()
+  {
+    return (cache_pos - cache_start) / ref_length;
+  }
+
+  // will be called by ROWS n FOLLOWING to catch up.
+  void move_to(ha_rows row_number)
+  {
+    cache_pos= cache_start + row_number * ref_length;
+  }
+protected:
+  bool at_eof() { return (cache_pos == cache_end); }
+
+  uchar *get_last_rowid()
+  {
+    if (cache_pos == cache_start)
+      return NULL;
+    else
+      return cache_pos - ref_length;
+  }
+
+  uchar *get_curr_rowid() { return cache_pos; }
+};
+
+
+/*
+  Cursor which reads from rowid sequence and also retrieves table rows.
+*/
+
+class Table_read_cursor : public Rowid_seq_cursor
+{
+  /* 
+    Note: we don't own *read_record, somebody else is using it.
+    We only look at the constant part of it, e.g. table, record buffer, etc.
+  */
+  READ_RECORD *read_record;
+public:
+
+  void init(READ_RECORD *info)
+  {
+    Rowid_seq_cursor::init(info);
+    read_record= info;
+  }
+
+  virtual int get_next()
+  {
+    if (at_eof())
+      return -1;
+
+    uchar* curr_rowid= get_curr_rowid();
+    int res= Rowid_seq_cursor::get_next();
+    if (!res)
+    {
+      res= read_record->table->file->ha_rnd_pos(read_record->record,
+                                                curr_rowid);
+    }
+    return res;
+  }
+
+  bool restore_last_row()
+  {
+    uchar *p;
+    if ((p= get_last_rowid()))
+    {
+      int rc= read_record->table->file->ha_rnd_pos(read_record->record, p);
+      if (!rc)
+        return true; // restored ok
+    }
+    return false; // didn't restore
+  }
+
+  // todo: should move_to() also read row here? 
+};
+
+/*
+  TODO: We should also have a cursor that reads table rows and 
+  stays within the current partition.
+*/
+
+/////////////////////////////////////////////////////////////////////////////
+
+
+/*
+  Window frame bound cursor. Abstract interface.
+  
+  @detail
+    The cursor moves within the partition that the current row is in.
+    It may be ahead or behind the current row.
+
+    The cursor also assumes that the current row moves forward through the
+    partition and will move to the next adjacent partition after this one.
+
+  @todo
+  - if we want to allocate this on the MEM_ROOT we should make sure 
+    it is not re-allocated for every subquery execution.
+*/
+
+class Frame_cursor : public Sql_alloc
+{
+public:
+  virtual void init(THD *thd, READ_RECORD *info, 
+                    SQL_I_List<ORDER> *partition_list,
+                    SQL_I_List<ORDER> *order_list)
+  {}
+
+  /*
+    Current row has moved to the next partition and is positioned on the first
+    row there. Position the frame bound accordingly.
+
+    @param first   -  TRUE means this is the first partition
+    @param item    -  Put or remove rows from there.
+
+    @detail
+      - if first==false, the caller guarantees that tbl->record[0] points at the
+        first row in the new partition.
+      - if first==true, we are just starting in the first partition and no such
+        guarantee is provided.
+
+      - The callee may move tbl->file and tbl->record[0] to point to some other
+        row.
+  */
+  virtual void pre_next_partition(longlong rownum, Item_sum* item){};
+  virtual void next_partition(longlong rownum, Item_sum* item)=0;
+  
+  /*
+    The current row has moved one row forward.
+    Move this frame bound accordingly, and update the value of aggregate
+    function as necessary.
+  */
+  virtual void pre_next_row(Item_sum* item){};
+  virtual void next_row(Item_sum* item)=0;
+  
+  virtual ~Frame_cursor(){}
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// RANGE-type frames
+//////////////////////////////////////////////////////////////////////////////
+
+/*
+  Frame_range_n_top handles the top end of RANGE-type frame.
+
+  That is, it handles:
+    RANGE BETWEEN n PRECEDING AND ...
+    RANGE BETWEEN n FOLLOWING AND ...
+
+  Top of the frame doesn't need to check for partition end, since bottom will
+  reach it before.
+*/
+
+class Frame_range_n_top : public Frame_cursor
+{
+  Table_read_cursor cursor;
+
+  Cached_item_item *range_expr;
+
+  Item *n_val;
+  Item *item_add;
+
+  const bool is_preceding;
+  /*
+     1  when order_list uses ASC ordering
+    -1  when order_list uses DESC ordering
+  */
+  int order_direction;
+public:
+  Frame_range_n_top(bool is_preceding_arg, Item *n_val_arg) :
+    n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg)
+  {}
+
+  void init(THD *thd, READ_RECORD *info,
+            SQL_I_List<ORDER> *partition_list,
+            SQL_I_List<ORDER> *order_list)
+  {
+    cursor.init(info);
+
+    DBUG_ASSERT(order_list->elements == 1);
+    Item *src_expr= order_list->first->item[0];
+    if (order_list->first->direction == ORDER::ORDER_ASC)
+      order_direction= 1;
+    else
+      order_direction= -1;
+
+    range_expr= (Cached_item_item*) new_Cached_item(thd, src_expr, FALSE);
+
+    bool use_minus= is_preceding;
+    if (order_direction == -1)
+      use_minus= !use_minus;
+
+    if (use_minus)
+      item_add= new (thd->mem_root) Item_func_minus(thd, src_expr, n_val);
+    else
+      item_add= new (thd->mem_root) Item_func_plus(thd, src_expr, n_val);
+
+    item_add->fix_fields(thd, &item_add);
+  }
+
+  void pre_next_partition(longlong rownum, Item_sum* item)
+  {
+    // Save the value of FUNC(current_row)
+    range_expr->fetch_value_from(item_add);
+  }
+
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    cursor.move_to(rownum);
+    walk_till_non_peer(item);
+  }
+
+  void pre_next_row(Item_sum* item)
+  {
+    range_expr->fetch_value_from(item_add);
+  }
+
+  void next_row(Item_sum* item)
+  {
+    /*
+      Ok, our cursor is at the first row R where
+        (prev_row + n) >= R
+      We need to check about the current row.
+    */
+    if (cursor.restore_last_row())
+    {
+      if (order_direction * range_expr->cmp_read_only() <= 0)
+        return;
+      item->remove();
+    }
+    walk_till_non_peer(item);
+  }
+
+private:
+  void walk_till_non_peer(Item_sum* item)
+  {
+    while (!cursor.get_next())
+    {
+      if (order_direction * range_expr->cmp_read_only() <= 0)
+        break;
+      item->remove();
+    }
+  }
+};
+
+
+/*
+  Frame_range_n_bottom handles bottom end of RANGE-type frame.
+
+  That is, it handles frame bounds in form:
+    RANGE BETWEEN ... AND n PRECEDING
+    RANGE BETWEEN ... AND n FOLLOWING
+
+  Bottom end moves first so it needs to check for partition end
+  (todo: unless it's PRECEDING and in that case it doesnt)
+  (todo: factor out common parts with Frame_range_n_top into
+   a common ancestor)
+*/
+
+class Frame_range_n_bottom: public Frame_cursor
+{
+  Table_read_cursor cursor;
+
+  Cached_item_item *range_expr;
+
+  Item *n_val;
+  Item *item_add;
+
+  const bool is_preceding;
+
+  Group_bound_tracker bound_tracker;
+  bool end_of_partition;
+
+  /*
+     1  when order_list uses ASC ordering
+    -1  when order_list uses DESC ordering
+  */
+  int order_direction;
+public:
+  Frame_range_n_bottom(bool is_preceding_arg, Item *n_val_arg) :
+    n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg)
+  {}
+
+  void init(THD *thd, READ_RECORD *info,
+            SQL_I_List<ORDER> *partition_list,
+            SQL_I_List<ORDER> *order_list)
+  {
+    cursor.init(info);
+
+    DBUG_ASSERT(order_list->elements == 1);
+    Item *src_expr= order_list->first->item[0];
+
+    if (order_list->first->direction == ORDER::ORDER_ASC)
+      order_direction= 1;
+    else
+      order_direction= -1;
+
+    range_expr= (Cached_item_item*) new_Cached_item(thd, src_expr, FALSE);
+
+    bool use_minus= is_preceding;
+    if (order_direction == -1)
+      use_minus= !use_minus;
+
+    if (use_minus)
+      item_add= new (thd->mem_root) Item_func_minus(thd, src_expr, n_val);
+    else
+      item_add= new (thd->mem_root) Item_func_plus(thd, src_expr, n_val);
+
+    item_add->fix_fields(thd, &item_add);
+
+    bound_tracker.init(thd, partition_list);
+  }
+
+  void pre_next_partition(longlong rownum, Item_sum* item)
+  {
+    // Save the value of FUNC(current_row)
+    range_expr->fetch_value_from(item_add);
+
+    bound_tracker.check_if_next_group();
+    end_of_partition= false;
+  }
+
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    cursor.move_to(rownum);
+    walk_till_non_peer(item);
+  }
+
+  void pre_next_row(Item_sum* item)
+  {
+    if (end_of_partition)
+      return;
+    range_expr->fetch_value_from(item_add);
+  }
+
+  void next_row(Item_sum* item)
+  {
+    if (end_of_partition)
+      return;
+    /*
+      Ok, our cursor is at the first row R where
+        (prev_row + n) >= R
+      We need to check about the current row.
+    */
+    if (cursor.restore_last_row())
+    {
+      if (order_direction * range_expr->cmp_read_only() < 0)
+        return;
+      item->add();
+    }
+    walk_till_non_peer(item);
+  }
+
+private:
+  void walk_till_non_peer(Item_sum* item)
+  {
+    int res;
+    while (!(res= cursor.get_next()))
+    {
+      if (bound_tracker.check_if_next_group())
+      {
+        end_of_partition= true;
+        break;
+      }
+      if (order_direction * range_expr->cmp_read_only() < 0)
+        break;
+      item->add();
+    }
+    if (res)
+      end_of_partition= true;
+  }
+};
+
+
+/*
+  RANGE BETWEEN ... AND CURRENT ROW, bottom frame bound for CURRENT ROW
+     ...
+   | peer1
+   | peer2  <----- current_row
+   | peer3 
+   +-peer4  <----- the cursor points here. peer4 itself is included.
+     nonpeer1
+     nonpeer2
+  
+  This bound moves in front of the current_row. It should be a the first row
+  that is still a peer of the current row.
+*/
+
+class Frame_range_current_row_bottom: public Frame_cursor
+{
+  Table_read_cursor cursor;
+  Group_bound_tracker peer_tracker;
+
+  bool dont_move;
+public:
+  void init(THD *thd, READ_RECORD *info,
+            SQL_I_List<ORDER> *partition_list,
+            SQL_I_List<ORDER> *order_list)
+  {
+    cursor.init(info);
+    peer_tracker.init(thd, order_list);
+  }
+
+  void pre_next_partition(longlong rownum, Item_sum* item)
+  {
+    // Save the value of the current_row
+    peer_tracker.check_if_next_group();
+    if (rownum != 0)
+    {
+      // Add the current row now because our cursor has already seen it
+      item->add();
+    }
+  }
+
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    walk_till_non_peer(item);
+  }
+
+  void pre_next_row(Item_sum* item)
+  {
+    dont_move= !peer_tracker.check_if_next_group();
+    if (!dont_move)
+      item->add();
+  }
+
+  void next_row(Item_sum* item)
+  {
+    // Check if our cursor is pointing at a peer of the current row.
+    // If not, move forward until that becomes true
+    if (dont_move)
+    {
+      /*
+        Our current is not a peer of the current row.
+        No need to move the bound.
+      */
+      return;
+    }
+    walk_till_non_peer(item);
+  }
+
+private:
+  void walk_till_non_peer(Item_sum* item)
+  {
+    /*
+      Walk forward until we've met first row that's not a peer of the current
+      row
+    */
+    while (!cursor.get_next())
+    {
+      if (peer_tracker.compare_with_cache())
+        break;
+      item->add();
+    }
+  }
+};
+
+
+/*
+  RANGE BETWEEN CURRENT ROW AND .... Top CURRENT ROW, RANGE-type frame bound
+
+      nonpeer1
+      nonpeer2
+    +-peer1  <----- the cursor points here. peer1 itself is included.
+    | peer2  
+    | peer3  <----- current_row
+    | peer4 
+      ... 
+
+  It moves behind the current_row. It is located right after the first peer of
+  the current_row.
+*/
+
+class Frame_range_current_row_top : public Frame_cursor
+{
+  Group_bound_tracker bound_tracker;
+
+  Table_read_cursor cursor;
+  Group_bound_tracker peer_tracker;
+
+  bool move;
+public:
+  void init(THD *thd, READ_RECORD *info,
+            SQL_I_List<ORDER> *partition_list,
+            SQL_I_List<ORDER> *order_list)
+  {
+    bound_tracker.init(thd, partition_list);
+
+    cursor.init(info);
+    peer_tracker.init(thd, order_list);
+  }
+
+  void pre_next_partition(longlong rownum, Item_sum* item)
+  {
+    // Fetch the value from the first row
+    peer_tracker.check_if_next_group();
+    cursor.move_to(rownum+1);
+  }
+
+  void next_partition(longlong rownum, Item_sum* item) {}
+
+  void pre_next_row(Item_sum* item)
+  {
+    // Check if the new current_row is a peer of the row that our cursor is
+    // pointing to.
+    move= peer_tracker.check_if_next_group();
+  }
+
+  void next_row(Item_sum* item)
+  {
+    if (move)
+    {
+      /*
+        Our cursor is pointing at the first row that was a peer of the previous
+        current row. Or, it was the first row in the partition.
+      */
+      if (cursor.restore_last_row())
+      {
+        // todo: need the following check ?
+        if (!peer_tracker.compare_with_cache())
+          return;
+        item->remove();
+      }
+
+      do
+      {
+        if (cursor.get_next())
+          return;
+        if (!peer_tracker.compare_with_cache())
+          return;
+        item->remove();
+      }
+      while (1);
+    }
+  }
+};
+
+
+/////////////////////////////////////////////////////////////////////////////
+// UNBOUNDED frame bounds (shared between RANGE and ROWS)
+/////////////////////////////////////////////////////////////////////////////
+
+/*
+  UNBOUNDED PRECEDING frame bound
+*/
+class Frame_unbounded_preceding : public Frame_cursor
+{
+public:
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    /*
+      UNBOUNDED PRECEDING frame end just stays on the first row.
+      We are top of the frame, so we don't need to update the sum function.
+    */
+  }
+
+  void next_row(Item_sum* item)
+  {
+    /* Do nothing, UNBOUNDED PRECEDING frame end doesn't move. */
+  }
+};
+
+/*
+  UNBOUNDED FOLLOWING frame bound
+*/
+
+class Frame_unbounded_following : public Frame_cursor
+{
+  Table_read_cursor cursor;
+
+  Group_bound_tracker bound_tracker;
+public:
+  void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list,
+            SQL_I_List<ORDER> *order_list)
+  {
+    cursor.init(info);
+    bound_tracker.init(thd, partition_list);
+  }
+
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    if (!rownum)
+    {
+      /* Read the first row */
+      if (cursor.get_next())
+        return;
+    }
+    /* Remember which partition we are in */
+    bound_tracker.check_if_next_group();
+    item->add();
+
+    /* Walk to the end of the partition, updating the SUM function */
+    while (!cursor.get_next())
+    {
+      if (bound_tracker.check_if_next_group())
+        break;
+      item->add();
+    }
+  }
+
+  void next_row(Item_sum* item)
+  {
+    /* Do nothing, UNBOUNDED FOLLOWING frame end doesn't move */
+  }
+};
+
+
+/////////////////////////////////////////////////////////////////////////////
+// ROWS-type frame bounds
+/////////////////////////////////////////////////////////////////////////////
+/*
+  ROWS $n PRECEDING frame bound
+
+*/
+class Frame_n_rows_preceding : public Frame_cursor
+{
+  /* Whether this is top of the frame or bottom */
+  const bool is_top_bound;
+  const ha_rows n_rows;
+
+  /* Number of rows that we need to skip before our cursor starts moving */
+  ha_rows n_rows_to_skip;
+
+  Table_read_cursor cursor;
+public:
+  Frame_n_rows_preceding(bool is_top_bound_arg, ha_rows n_rows_arg) :
+    is_top_bound(is_top_bound_arg), n_rows(n_rows_arg)
+  {}
+
+  void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list,
+            SQL_I_List<ORDER> *order_list)
+  {
+    cursor.init(info);
+  }
+
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    /*
+      Position our cursor to point at the first row in the new partition
+      (for rownum=0, it is already there, otherwise, it lags behind)
+    */
+    if (rownum != 0)
+      cursor.move_to(rownum);
+
+    /*
+      Suppose the bound is ROWS 2 PRECEDING, and current row is row#n:
+        ...
+        n-3
+        n-2 --- bound row
+        n-1
+         n  --- current_row
+        ...
+       The bound should point at row #(n-2). Bounds are inclusive, so
+        - bottom bound should add row #(n-2) into the window function
+        - top bound should remove row (#n-3) from the window function.
+    */
+    n_rows_to_skip= n_rows + (is_top_bound? 1:0) - 1;
+  }
+
+  void next_row(Item_sum* item)
+  {
+    if (n_rows_to_skip)
+    {
+      n_rows_to_skip--;
+      return;
+    }
+
+    if (cursor.get_next())
+      return;  // this is not expected to happen.
+
+    if (is_top_bound) // this is frame start endpoint
+      item->remove();
+    else
+      item->add();
+  }
+};
+
+
+/*
+  ROWS ... CURRENT ROW, Bottom bound.
+
+  This case is moved to separate class because here we don't need to maintain
+  our own cursor, or check for partition bound.
+*/
+
+class Frame_rows_current_row_bottom : public Frame_cursor
+{
+public:
+  void pre_next_partition(longlong rownum, Item_sum* item)
+  {
+    item->add();
+  }
+  void next_partition(longlong rownum, Item_sum* item) {}
+  void pre_next_row(Item_sum* item)
+  {
+    /* Temp table's current row is current_row. Add it to the window func */
+    item->add();
+  }
+  void next_row(Item_sum* item) {};
+};
+
+
+/*
+  ROWS-type CURRENT ROW, top bound.
+
+  This serves for processing "ROWS BETWEEN CURRENT ROW AND ..." frames.
+
+      n-1
+       n  --+  --- current_row, and top frame bound
+      n+1   |
+      ...   |
+
+  when the current_row moves to row #n, this frame bound should remove the
+  row #(n-1) from the window function.
+
+  In other words, we need what "ROWS PRECEDING 0" provides.
+*/
+class Frame_rows_current_row_top: public Frame_n_rows_preceding
+
+{
+public:
+  Frame_rows_current_row_top() :
+    Frame_n_rows_preceding(true /*top*/, 0 /* n_rows */)
+  {}
+};
+
+
+/*
+  ROWS $n FOLLOWING frame bound.
+*/
+
+class Frame_n_rows_following : public Frame_cursor
+{
+  /* Whether this is top of the frame or bottom */
+  const bool is_top_bound;
+  const ha_rows n_rows;
+
+  Table_read_cursor cursor;
+  bool at_partition_end;
+
+  /*
+    This cursor reaches partition end before the main cursor has reached it.
+    bound_tracker is used to detect partition end.
+  */
+  Group_bound_tracker bound_tracker;
+public:
+  Frame_n_rows_following(bool is_top_bound_arg, ha_rows n_rows_arg) :
+    is_top_bound(is_top_bound_arg), n_rows(n_rows_arg)
+  {
+    DBUG_ASSERT(n_rows > 0);
+  }
+
+  void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list,
+            SQL_I_List<ORDER> *order_list)
+  {
+    cursor.init(info);
+    at_partition_end= false;
+    bound_tracker.init(thd, partition_list);
+  }
+
+  void pre_next_partition(longlong rownum, Item_sum* item)
+  {
+    at_partition_end= false;
+
+    // Fetch current partition value
+    bound_tracker.check_if_next_group();
+
+    if (rownum != 0)
+    {
+      // This is only needed for "FOLLOWING 1". It is one row behind
+      cursor.move_to(rownum+1);
+
+      // Current row points at the first row in the partition
+      if (is_top_bound) // this is frame top endpoint
+        item->remove();
+      else
+        item->add();
+    }
+  }
+
+  /* Move our cursor to be n_rows ahead.  */
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    longlong i_end= n_rows + ((rownum==0)?1:0)- is_top_bound;
+    for (longlong i= 0; i < i_end; i++)
+    {
+      if (next_row_intern(item))
+        break;
+    }
+  }
+
+  void next_row(Item_sum* item)
+  {
+    if (at_partition_end)
+      return;
+    next_row_intern(item);
+  }
+
+private:
+  bool next_row_intern(Item_sum *item)
+  {
+    if (!cursor.get_next())
+    {
+      if (bound_tracker.check_if_next_group())
+        at_partition_end= true;
+      else
+      {
+        if (is_top_bound) // this is frame start endpoint
+          item->remove();
+        else
+          item->add();
+      }
+    }
+    else
+      at_partition_end= true;
+    return at_partition_end;
+  }
+};
+
+
+/*
+  Get a Frame_cursor for a frame bound. This is a "factory function".
+*/
+Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound)
+{
+  if (!frame)
+  {
+    /*
+      The docs say this about the lack of frame clause:
+
+        Let WD be a window structure descriptor.
+        ...
+        If WD has no window framing clause, then
+        Case:
+        i) If the window ordering clause of WD is not present, then WF is the
+           window partition of R.
+        ii) Otherwise, WF consists of all rows of the partition of R that
+           precede R or are peers of R in the window ordering of the window
+           partition defined by the window ordering clause.
+
+        For case #ii, the frame bounds essentially are "RANGE BETWEEN UNBOUNDED
+        PRECEDING AND CURRENT ROW".
+        For the case #i, without ordering clause all rows are considered peers,
+        so again the same frame bounds can be used.
+    */
+    if (is_top_bound)
+      return new Frame_unbounded_preceding;
+    else
+      return new Frame_range_current_row_bottom;
+  }
+
+  Window_frame_bound *bound= is_top_bound? frame->top_bound :
+                                           frame->bottom_bound;
+
+  if (bound->precedence_type == Window_frame_bound::PRECEDING ||
+      bound->precedence_type == Window_frame_bound::FOLLOWING)
+  {
+    bool is_preceding= (bound->precedence_type ==
+                        Window_frame_bound::PRECEDING);
+
+    if (bound->offset == NULL) /* this is UNBOUNDED */
+    {
+      /* The following serve both RANGE and ROWS: */
+      if (is_preceding)
+        return new Frame_unbounded_preceding;
+      else
+        return new Frame_unbounded_following;
+    }
+
+    if (frame->units == Window_frame::UNITS_ROWS)
+    {
+      longlong n_rows= bound->offset->val_int();
+      /* These should be handled in the parser */
+      DBUG_ASSERT(!bound->offset->null_value);
+      DBUG_ASSERT(n_rows > 0);
+      if (is_preceding)
+        return new Frame_n_rows_preceding(is_top_bound, n_rows);
+      else
+        return new Frame_n_rows_following(is_top_bound, n_rows);
+    }
+    else
+    {
+      if (is_top_bound)
+        return new Frame_range_n_top(is_preceding, bound->offset);
+      else
+        return new Frame_range_n_bottom(is_preceding, bound->offset);
+    }
+  }
+
+  if (bound->precedence_type == Window_frame_bound::CURRENT)
+  {
+    if (frame->units == Window_frame::UNITS_ROWS)
+    {
+      if (is_top_bound)
+        return new Frame_rows_current_row_top;
+      else
+        return new Frame_rows_current_row_bottom;
+    }
+    else
+    {
+      if (is_top_bound)
+        return new Frame_range_current_row_top;
+      else
+        return new Frame_range_current_row_bottom;
+    }
+  }
+  return NULL;
+}
+
+
+/*
+  Streamed window function computation with window frames.
+
+  We make a single pass over the ordered temp.table, but we're using three
+  cursors: 
+   - current row - the row that we're computing window func value for)
+   - start_bound - the start of the frame
+   - bottom_bound   - the end of the frame
+   
+  All three cursors move together.
+
+  @todo
+    Provided bounds have their 'cursors'... is it better to re-clone their
+    cursors or re-position them onto the current row?
+
+  @detail
+    ROWS BETWEEN 3 PRECEDING  -- frame start
+              AND 3 FOLLOWING  -- frame end
+     
+                                    /------ frame end (aka BOTTOM)
+    Dataset start                   |
+     --------====*=======[*]========*========-------->> dataset end
+                 |        \  
+                 |         +-------- current row
+                 |
+                 \-------- frame start ("TOP")
+  
+    - frame_end moves forward and adds rows into the aggregate function.
+    - frame_start follows behind and removes rows from the aggregate function.
+    - current_row is the row where the value of aggregate function is stored.
+
+  @TODO:  Only the first cursor needs to check for run-out-of-partition
+  condition (Others can catch up by counting rows?)
+
+*/
+
+bool compute_window_func_with_frames(Item_window_func *item_win,
+                                     TABLE *tbl, READ_RECORD *info)
+{
+  THD *thd= current_thd;
+  int err= 0;
+  Frame_cursor *top_bound;
+  Frame_cursor *bottom_bound;
+
+  Item_sum *sum_func= item_win->window_func();
+  /* This algorithm doesn't support DISTINCT aggregator */
+  sum_func->set_aggregator(Aggregator::SIMPLE_AGGREGATOR);
+  
+  Window_frame *window_frame= item_win->window_spec->window_frame;
+  top_bound= get_frame_cursor(window_frame, true);
+  bottom_bound= get_frame_cursor(window_frame, false);
+    
+  top_bound->init(thd, info, item_win->window_spec->partition_list,
+                  item_win->window_spec->order_list);
+  bottom_bound->init(thd, info, item_win->window_spec->partition_list,
+                     item_win->window_spec->order_list);
+
+  bool is_error= false;
+  longlong rownum= 0;
+  uchar *rowid_buf= (uchar*) my_malloc(tbl->file->ref_length, MYF(0));
+
+  while (true)
+  {
+    /* Move the current_row */
+    if ((err=info->read_record(info)))
+    {
+      break; /* End of file */
+    }
+    bool partition_changed= item_win->check_if_partition_changed();
+
+    tbl->file->position(tbl->record[0]);
+    memcpy(rowid_buf, tbl->file->ref, tbl->file->ref_length);
+
+    if (partition_changed || (rownum == 0))
+    {
+      sum_func->clear();
+      /*
+        pre_XXX functions assume that tbl->record[0] contains current_row, and 
+        they may not change it.
+      */
+      bottom_bound->pre_next_partition(rownum, sum_func);
+      top_bound->pre_next_partition(rownum, sum_func);
+      /*
+        We move bottom_bound first, because we want rows to be added into the
+        aggregate before top_bound attempts to remove them.
+      */
+      bottom_bound->next_partition(rownum, sum_func);
+      top_bound->next_partition(rownum, sum_func);
+    }
+    else
+    {
+      /* Again, both pre_XXX function can find current_row in tbl->record[0] */
+      bottom_bound->pre_next_row(sum_func);
+      top_bound->pre_next_row(sum_func);
+
+      /* These make no assumptions about tbl->record[0] and may change it */
+      bottom_bound->next_row(sum_func);
+      top_bound->next_row(sum_func);
+    }
+    rownum++;
+
+    /*
+      Frame cursors may have made tbl->record[0] to point to some record other
+      than current_row. This applies to tbl->file's internal state, too.
+      Fix this by reading the current row again.
+    */
+    tbl->file->ha_rnd_pos(tbl->record[0], rowid_buf);
+    store_record(tbl,record[1]);
+    item_win->save_in_field(item_win->result_field, true);
+    err= tbl->file->ha_update_row(tbl->record[1], tbl->record[0]);
+    if (err && err != HA_ERR_RECORD_IS_THE_SAME)
+    {
+      is_error= true;
+      break;
+    }
+  }
+
+  my_free(rowid_buf);
+  delete top_bound;
+  delete bottom_bound;
+  return is_error? true: false;
+}
+
+
+bool compute_two_pass_window_functions(Item_window_func *item_win,
+                                       TABLE *table, READ_RECORD *info)
+{
+  /* Perform first pass. */
+
+  // TODO-cvicentiu why not initialize the record for when we need, _in_
+  // this function.
+  READ_RECORD *info2= new READ_RECORD();
+  int err;
+  bool is_error = false;
+  bool first_row= true;
+  clone_read_record(info, info2);
+  Item_sum_window_with_context *window_func= 
+    static_cast<Item_sum_window_with_context *>(item_win->window_func());
+  uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0));
+
+  is_error= window_func->create_window_context();
+  /* Unable to allocate a new context. */
+  if (is_error)
+    return true;
+
+  Window_context *context = window_func->get_window_context();
+  /*
+     The two pass algorithm is as follows:
+     We have a sorted table according to the partition and order by clauses.
+     1. Scan through the table till we reach a partition boundary.
+     2. For each row that we scan, add it to the context.
+     3. Once the partition boundary is met, do a second scan through the
+     current partition and use the context information to compute the value for
+     the window function for that partition.
+     4. Reset the context.
+     5. Repeat from 1 till end of table.
+  */
+
+  bool done = false;
+  longlong rows_in_current_partition = 0;
+  // TODO handle end of table updating.
+  while (!done)
+  {
+
+    if ((err= info->read_record(info)))
+    {
+      done = true;
+    }
+
+    bool partition_changed= done || item_win->check_if_partition_changed();
+    // The first time we always have a partition changed. Ignore it.
+    if (first_row)
+    {
+      partition_changed= false;
+      first_row= false;
+    }
+
+    if (partition_changed)
+    {
+      /*
+         We are now looking at the first row for the next partition, or at the
+         end of the table. Either way, we must remember this position for when
+         we finish doing the second pass.
+      */
+      table->file->position(table->record[0]);
+      memcpy(rowid_buf, table->file->ref, table->file->ref_length);
+
+      for (longlong row_number = 0; row_number < rows_in_current_partition;
+          row_number++)
+      {
+        if ((err= info2->read_record(info2)))
+        {
+          is_error= true;
+          break;
+        }
+        window_func->add();
+        // Save the window function into the table.
+        item_win->save_in_field(item_win->result_field, true);
+        err= table->file->ha_update_row(table->record[1], table->record[0]);
+        if (err && err != HA_ERR_RECORD_IS_THE_SAME)
+        {
+          is_error= true;
+          break;
+        }
+      }
+
+      if (is_error)
+        break;
+
+      rows_in_current_partition= 0;
+      window_func->clear();
+      context->reset();
+
+      // Return to the beginning of the new partition.
+      table->file->ha_rnd_pos(table->record[0], rowid_buf);
+    }
+    rows_in_current_partition++;
+    context->add_field_to_context(item_win->result_field);
+  }
+
+  window_func->delete_window_context();
+  delete info2;
+  my_free(rowid_buf);
+  return is_error;
+}
+
+
+/* Make a list that is a concation of two lists of ORDER elements */
+
+static ORDER* concat_order_lists(MEM_ROOT *mem_root, ORDER *list1, ORDER *list2)
+{
+  if (!list1)
+  {
+    list1= list2;
+    list2= NULL;
+  }
+
+  ORDER *res= NULL; // first element in the new list
+  ORDER *prev= NULL; // last element in the new list 
+  ORDER *cur_list= list1; // this goes through list1, list2
+  while (cur_list)
+  {
+    for (ORDER *cur= cur_list; cur; cur= cur->next)
+    {
+      ORDER *copy= (ORDER*)alloc_root(mem_root, sizeof(ORDER));
+      memcpy(copy, cur, sizeof(ORDER));
+      if (prev)
+        prev->next= copy;
+      prev= copy;
+      if (!res)
+        res= copy;
+    }
+
+    cur_list= (cur_list == list1)? list2: NULL;
+  }
+
+  if (prev)
+    prev->next= NULL;
+
+  return res;
+}
+
+
+bool Window_func_runner::setup(THD *thd)
+{
+  Window_spec *spec = win_func->window_spec;
+
+  ORDER* sort_order= concat_order_lists(thd->mem_root, 
+                                        spec->partition_list->first,
+                                        spec->order_list->first);
+  filesort= new (thd->mem_root) Filesort(sort_order, HA_POS_ERROR, NULL);
+  filesort->tracker= new Filesort_tracker(thd->lex->analyze_stmt);
+
+  win_func->setup_partition_border_check(thd);
+
+  Item_sum::Sumfunctype type= win_func->window_func()->sum_func();
+  switch (type) 
+  {
+    case Item_sum::ROW_NUMBER_FUNC:
+    case Item_sum::RANK_FUNC:
+    case Item_sum::DENSE_RANK_FUNC:
+    {
+      /*
+        One-pass window function computation, walk through the rows and
+        assign values.
+      */
+      compute_func= compute_window_func_values;
+      break;
+    }
+    case Item_sum::PERCENT_RANK_FUNC:
+    case Item_sum::CUME_DIST_FUNC:
+    {
+      compute_func= compute_two_pass_window_functions;
+      break;
+    }
+    case Item_sum::COUNT_FUNC:
+    case Item_sum::SUM_BIT_FUNC:
+    case Item_sum::SUM_FUNC:
+    case Item_sum::AVG_FUNC:
+    {
+      /*
+        Frame-aware window function computation. It does one pass, but
+        uses three cursors -frame_start, current_row, and frame_end.
+      */
+      compute_func= compute_window_func_with_frames;
+      break;
+    }
+    default:
+      DBUG_ASSERT(0);
+  }
+
+  return false;
+}
+
+
+/*
+  Compute the value of window function for all rows.
+*/
+bool Window_func_runner::exec(JOIN *join)
+{
+  THD *thd= join->thd;
+  JOIN_TAB *join_tab= &join->join_tab[join->top_join_tab_count];
+
+  if (create_sort_index(thd, join, join_tab,
+                        filesort))
+    return true;
+
+  win_func->set_phase_to_computation();
+
+  /*
+    Go through the sorted array and compute the window function
+  */
+  READ_RECORD info;
+  TABLE *tbl= join_tab->table;
+
+  if (init_read_record(&info, thd, tbl, NULL/*select*/, join_tab->filesort_result,
+                       0, 1, FALSE))
+    return true;
+
+  bool is_error= compute_func(win_func, tbl, &info);
+
+  /* This calls filesort_free_buffers(): */
+  end_read_record(&info);
+  delete join_tab->filesort_result;
+  join_tab->filesort_result= NULL;
+  win_func->set_phase_to_retrieval();
+
+  return is_error;
+}
+
+
+bool Window_funcs_computation::setup(THD *thd,
+                                     List<Item_window_func> *window_funcs)
+{
+  List_iterator_fast<Item_window_func> it(*window_funcs);
+  Item_window_func *item_win;
+  Window_func_runner *runner;
+  // for each window function
+  while ((item_win= it++))
+  {
+    // Create a runner and call setup for it
+    if (!(runner= new Window_func_runner(item_win)) ||
+        runner->setup(thd))
+    {
+      return true;
+    }
+    win_func_runners.push_back(runner, thd->mem_root);
+  }
+  return false;
+}
+
+
+bool Window_funcs_computation::exec(JOIN *join)
+{
+  List_iterator<Window_func_runner> it(win_func_runners);
+  Window_func_runner *runner;
+  /* Execute each runner */
+  while ((runner = it++))
+  {
+    if (runner->exec(join))
+      return true;
+  }
+  return false;
+}
+
+
+void Window_funcs_computation::cleanup()
+{
+  List_iterator<Window_func_runner> it(win_func_runners);
+  Window_func_runner *runner;
+  while ((runner = it++))
+  {
+    runner->cleanup();
+    delete runner;
+  }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// Unneeded comments (will be removed when we develop a replacement for
+//  the feature that was attempted here
+/////////////////////////////////////////////////////////////////////////////
+  /*
+   TODO Get this code to set can_compute_window_function during preparation,
+   not during execution.
+
+   The reason for this is the following:
+   Our single scan optimization for window functions without tmp table,
+   is valid, if and only if, we only need to perform one sorting operation,
+   via filesort. The cases where we need to perform one sorting operation only:
+
+   * A select with only one window function.
+   * A select with multiple window functions, but they must have their
+     partition and order by clauses compatible. This means that one ordering
+     is acceptable for both window functions.
+
+       For example:
+       partition by a, b, c; order by d, e    results in sorting by a b c d e.
+       partition by a; order by d             results in sorting by a d.
+
+       This kind of sorting is compatible. The less specific partition does
+       not care for the order of b and c columns so it is valid if we sort
+       by those in case of equality over a.
+
+       partition by a, b; order by d, e      results in sorting by a b d e
+       partition by a; order by e            results in sorting by a e
+
+      This sorting is incompatible due to the order by clause. The partition by
+      clause is compatible, (partition by a) is a prefix for (partition by a, b)
+      However, order by e is not a prefix for order by d, e, thus it is not
+      compatible.
+
+    The rule for having compatible sorting is thus:
+      Each partition order must contain the other window functions partitions
+      prefixes, or be a prefix itself. This must hold true for all partitions.
+      Analog for the order by clause.  
+  */
+#if 0
+  List<Item_window_func> window_functions;
+  SQL_I_List<ORDER> largest_partition;
+  SQL_I_List<ORDER> largest_order_by;
+  bool can_compute_window_live = !need_tmp;
+  // Construct the window_functions item list and check if they can be
+  // computed using only one sorting.
+  //
+  // TODO: Perhaps group functions into compatible sorting bins
+  // to minimize the number of sorting passes required to compute all of them.
+  while ((item= it++))
+  {
+    if (item->type() == Item::WINDOW_FUNC_ITEM)
+    {
+      Item_window_func *item_win = (Item_window_func *) item;
+      window_functions.push_back(item_win);
+      if (!can_compute_window_live)
+        continue;  // No point checking  since we have to perform multiple sorts.
+      Window_spec *spec = item_win->window_spec;
+      // Having an empty partition list on one window function and a
+      // not empty list on a separate window function causes the sorting
+      // to be incompatible.
+      //
+      // Example:
+      // over (partition by a, order by x) && over (order by x).
+      //
+      // The first function requires an ordering by a first and then by x,
+      // while the seond function requires an ordering by x first.
+      // The same restriction is not required for the order by clause.
+      if (largest_partition.elements && !spec->partition_list.elements)
+      {
+        can_compute_window_live= FALSE;
+        continue;
+      }
+      can_compute_window_live= test_if_order_compatible(largest_partition,
+                                                        spec->partition_list);
+      if (!can_compute_window_live)
+        continue;
+
+      can_compute_window_live= test_if_order_compatible(largest_order_by,
+                                                        spec->order_list);
+      if (!can_compute_window_live)
+        continue;
+
+      if (largest_partition.elements < spec->partition_list.elements)
+        largest_partition = spec->partition_list;
+      if (largest_order_by.elements < spec->order_list.elements)
+        largest_order_by = spec->order_list;
+    }
+  }
+  if (can_compute_window_live && window_functions.elements && table_count == 1)
+  {
+    ha_rows examined_rows = 0;
+    ha_rows found_rows = 0;
+    ha_rows filesort_retval;
+    SORT_FIELD *s_order= (SORT_FIELD *) my_malloc(sizeof(SORT_FIELD) *
+        (largest_partition.elements + largest_order_by.elements) + 1,
+        MYF(MY_WME | MY_ZEROFILL | MY_THREAD_SPECIFIC));
+
+    size_t pos= 0;
+    for (ORDER* curr = largest_partition.first; curr; curr=curr->next, pos++)
+      s_order[pos].item = *curr->item;
+
+    for (ORDER* curr = largest_order_by.first; curr; curr=curr->next, pos++)
+      s_order[pos].item = *curr->item;
+
+    table[0]->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
+                                               MYF(MY_WME | MY_ZEROFILL|
+                                                   MY_THREAD_SPECIFIC));
+
+
+    filesort_retval= filesort(thd, table[0], s_order,
+                              (largest_partition.elements + largest_order_by.elements),
+                              this->select, HA_POS_ERROR, FALSE,
+                              &examined_rows, &found_rows,
+                              this->explain->ops_tracker.report_sorting(thd));
+    table[0]->sort.found_records= filesort_retval;
+
+    join_tab->read_first_record = join_init_read_record;
+    join_tab->records= found_rows;
+
+    my_free(s_order);
+  }
+  else
+#endif
+
diff --git a/sql/sql_window.h b/sql/sql_window.h
new file mode 100644
index 00000000000..4c88a49b6b9
--- /dev/null
+++ b/sql/sql_window.h
@@ -0,0 +1,196 @@
+
+#ifndef SQL_WINDOW_INCLUDED
+#define SQL_WINDOW_INCLUDED
+
+#include "my_global.h"
+#include "item.h"
+#include "filesort.h"
+#include "records.h"
+
+class Item_window_func;
+
+/*
+  Window functions module. 
+  
+  Each instance of window function has its own element in SELECT_LEX::window_specs.
+*/
+
+
+class Window_frame_bound : public Sql_alloc
+{
+
+public:
+ 
+  enum Bound_precedence_type
+  {
+    PRECEDING,
+    CURRENT,           // Used for CURRENT ROW window frame bounds
+    FOLLOWING
+  };
+
+  Bound_precedence_type precedence_type;
+ 
+
+  /* 
+    For UNBOUNDED PRECEDING / UNBOUNDED FOLLOWING window frame bounds
+    precedence type is seto to PRECEDING / FOLLOWING and
+    offset is set to NULL. 
+    The offset is not meaningful with precedence type CURRENT 
+  */
+  Item *offset;
+
+  Window_frame_bound(Bound_precedence_type prec_type,
+                     Item *offset_val)
+    : precedence_type(prec_type), offset(offset_val) {}
+
+  bool is_unbounded() { return offset == NULL; }
+
+};
+
+
+class Window_frame : public Sql_alloc
+{
+  
+public:
+
+  enum Frame_units
+  {
+    UNITS_ROWS,
+    UNITS_RANGE
+  };
+
+  enum Frame_exclusion
+  {
+    EXCL_NONE,
+    EXCL_CURRENT_ROW,
+    EXCL_GROUP,
+    EXCL_TIES
+  };
+
+  Frame_units units;
+
+  Window_frame_bound *top_bound;
+
+  Window_frame_bound *bottom_bound;
+
+  Frame_exclusion exclusion;
+
+  Window_frame(Frame_units win_frame_units,
+               Window_frame_bound *win_frame_top_bound,
+               Window_frame_bound *win_frame_bottom_bound,
+               Frame_exclusion win_frame_exclusion)
+    : units(win_frame_units), top_bound(win_frame_top_bound),
+      bottom_bound(win_frame_bottom_bound), exclusion(win_frame_exclusion) {}
+
+  bool check_frame_bounds();
+
+};
+
+class Window_spec : public Sql_alloc
+{
+ public:
+
+  LEX_STRING *window_ref;
+
+  SQL_I_List<ORDER> *partition_list;
+
+  SQL_I_List<ORDER> *order_list;
+
+  Window_frame *window_frame;
+
+  Window_spec *referenced_win_spec;
+
+  Window_spec(LEX_STRING *win_ref, 
+              SQL_I_List<ORDER> *part_list,
+              SQL_I_List<ORDER> *ord_list,
+              Window_frame *win_frame)
+    : window_ref(win_ref), partition_list(part_list), order_list(ord_list),
+    window_frame(win_frame), referenced_win_spec(NULL) {}
+
+  virtual char *name() { return NULL; }
+
+  bool check_window_names(List_iterator_fast<Window_spec> &it);
+
+  char *window_reference() { return window_ref ? window_ref->str : NULL; }
+};
+
+class Window_def : public Window_spec
+{
+ public:
+
+  LEX_STRING *window_name;
+
+  Window_def(LEX_STRING *win_name,
+             LEX_STRING *win_ref, 
+             SQL_I_List<ORDER> *part_list,
+             SQL_I_List<ORDER> *ord_list,
+             Window_frame *win_frame) 
+    : Window_spec(win_ref, part_list, ord_list, win_frame),
+      window_name(win_name) {}
+ 
+  char *name() { return window_name->str; }
+
+};
+
+int setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
+	          List<Item> &fields, List<Item> &all_fields, 
+                  List<Window_spec> &win_specs, List<Item_window_func> &win_funcs);
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Classes that make window functions computation a part of SELECT's query plan
+//////////////////////////////////////////////////////////////////////////////
+
+typedef bool (*window_compute_func_t)(Item_window_func *item_win,
+                                      TABLE *tbl, READ_RECORD *info);
+
+/*
+  This handles computation of one window function.
+
+  Currently, we make a spearate filesort() call for each window function.
+*/
+
+class Window_func_runner : public Sql_alloc 
+{
+  Item_window_func *win_func;
+  /* Window function can be computed over this sorting */
+  Filesort *filesort;
+
+  /* The function to use for computation*/
+  window_compute_func_t compute_func;
+  
+public:
+  Window_func_runner(Item_window_func *win_func_arg) :
+    win_func(win_func_arg)
+  {}
+
+  // Set things up. Create filesort structures, etc
+  bool setup(THD *thd);
+ 
+  // This sorts and runs the window function.
+  bool exec(JOIN *join);
+
+  void cleanup() { delete filesort; }
+};
+
+
+/*
+  This is a "window function computation phase": a single object of this class
+  takes care of computing all window functions in a SELECT.
+
+  - JOIN optimizer is exected to call setup() during query optimization.
+  - JOIN::exec() should call exec() once it has collected join output in a
+    temporary table.
+*/
+
+class Window_funcs_computation : public Sql_alloc
+{
+  List<Window_func_runner> win_func_runners;
+public:
+  bool setup(THD *thd, List<Item_window_func> *window_funcs);
+  bool exec(JOIN *join);
+  void cleanup();
+};
+
+
+#endif /* SQL_WINDOW_INCLUDED */
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 1870b3f719f..b8d0238b753 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -55,6 +55,8 @@
 #include "sql_signal.h"
 #include "sql_get_diagnostics.h"               // Sql_cmd_get_diagnostics
 #include "sql_cte.h"
+#include "sql_window.h"
+#include "item_windowfunc.h"
 #include "event_parse_data.h"
 #include "create_options.h"
 #include <myisam.h>
@@ -994,6 +996,8 @@ bool LEX::set_bincmp(CHARSET_INFO *cs, bool bin)
   handlerton *db_type;
   st_select_lex *select_lex;
   struct p_elem_val *p_elem_value;
+  class Window_frame *window_frame;
+  class Window_frame_bound *window_frame_bound;
   udf_func *udf;
 
   /* enums */
@@ -1019,6 +1023,9 @@ bool LEX::set_bincmp(CHARSET_INFO *cs, bool bin)
   enum sp_variable::enum_mode spvar_mode;
   enum thr_lock_type lock_type;
   enum enum_mysql_timestamp_type date_time_type;
+  enum Window_frame_bound::Bound_precedence_type bound_precedence_type;
+  enum Window_frame::Frame_units frame_units;
+  enum Window_frame::Frame_exclusion frame_exclusion;
   DDL_options_st object_ddl_options;
 }
 
@@ -1030,10 +1037,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %parse-param { THD *thd }
 %lex-param { THD *thd }
 /*
-  Currently there are 121 shift/reduce conflicts.
+  Currently there are 124 shift/reduce conflicts.
   We should not introduce new conflicts any more.
 */
-%expect 121
+%expect 124
 
 /*
    Comments for TOKENS.
@@ -1156,6 +1163,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  CREATE                        /* SQL-2003-R */
 %token  CROSS                         /* SQL-2003-R */
 %token  CUBE_SYM                      /* SQL-2003-R */
+%token  CUME_DIST_SYM
 %token  CURDATE                       /* MYSQL-FUNC */
 %token  CURRENT_SYM                   /* SQL-2003-R */
 %token  CURRENT_USER                  /* SQL-2003-R */
@@ -1186,6 +1194,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  DELAYED_SYM
 %token  DELAY_KEY_WRITE_SYM
 %token  DELETE_SYM                    /* SQL-2003-R */
+%token  DENSE_RANK_SYM
 %token  DESC                          /* SQL-2003-N */
 %token  DESCRIBE                      /* SQL-2003-R */
 %token  DES_KEY_FILE
@@ -1227,6 +1236,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  EVERY_SYM                     /* SQL-2003-N */
 %token  EXCHANGE_SYM
 %token  EXAMINED_SYM
+%token  EXCLUDE_SYM
 %token  EXECUTE_SYM                   /* SQL-2003-R */
 %token  EXISTS                        /* SQL-2003-R */
 %token  EXIT_SYM
@@ -1245,6 +1255,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  FLOAT_NUM
 %token  FLOAT_SYM                     /* SQL-2003-R */
 %token  FLUSH_SYM
+%token  FOLLOWING_SYM
 %token  FORCE_SYM
 %token  FOREIGN                       /* SQL-2003-R */
 %token  FOR_SYM                       /* SQL-2003-R */
@@ -1441,9 +1452,11 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  ORDER_SYM                     /* SQL-2003-R */
 %token  OR_OR_SYM                     /* OPERATOR */
 %token  OR_SYM                        /* SQL-2003-R */
+%token  OTHERS_SYM            
 %token  OUTER
 %token  OUTFILE
 %token  OUT_SYM                       /* SQL-2003-R */
+%token  OVER_SYM
 %token  OWNER_SYM
 %token  PACK_KEYS_SYM
 %token  PAGE_SYM
@@ -1456,6 +1469,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  PARTITIONS_SYM
 %token  PARTITIONING_SYM
 %token  PASSWORD_SYM
+%token  PERCENT_RANK_SYM
 %token  PERSISTENT_SYM
 %token  PHASE_SYM
 %token  PLUGINS_SYM
@@ -1464,6 +1478,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  POLYGON
 %token  PORT_SYM
 %token  POSITION_SYM                  /* SQL-2003-N */
+%token  PRECEDING_SYM
 %token  PRECISION                     /* SQL-2003-R */
 %token  PREPARE_SYM                   /* SQL-2003-R */
 %token  PRESERVE_SYM
@@ -1481,6 +1496,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  QUERY_SYM
 %token  QUICK
 %token  RANGE_SYM                     /* SQL-2003-R */
+%token  RANK_SYM        
 %token  READS_SYM                     /* SQL-2003-R */
 %token  READ_ONLY_SYM
 %token  READ_SYM                      /* SQL-2003-N */
@@ -1531,6 +1547,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  ROW_FORMAT_SYM
 %token  ROW_SYM                       /* SQL-2003-R */
 %token  ROW_COUNT_SYM                 /* SQL-2003-N */
+%token  ROW_NUMBER_SYM
 %token  RTREE_SYM
 %token  SAVEPOINT_SYM                 /* SQL-2003-R */
 %token  SCHEDULE_SYM
@@ -1621,6 +1638,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  TEXT_SYM
 %token  THAN_SYM
 %token  THEN_SYM                      /* SQL-2003-R */
+%token  TIES_SYM
 %token  TIMESTAMP                     /* SQL-2003-R */
 %token  TIMESTAMP_ADD
 %token  TIMESTAMP_DIFF
@@ -1641,6 +1659,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  TYPE_SYM                      /* SQL-2003-N */
 %token  UDF_RETURNS_SYM
 %token  ULONGLONG_NUM
+%token  UNBOUNDED_SYM
 %token  UNCOMMITTED_SYM               /* SQL-2003-N */
 %token  UNDEFINED_SYM
 %token  UNDERSCORE_CHARSET
@@ -1682,6 +1701,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
 %token  WEIGHT_STRING_SYM
 %token  WHEN_SYM                      /* SQL-2003-R */
 %token  WHERE                         /* SQL-2003-R */
+%token  WINDOW_SYM
 %token  WHILE_SYM
 %token  WITH                          /* SQL-2003-R */
 %token  WITH_CUBE_SYM                 /* INTERNAL */
@@ -1821,6 +1841,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
         simple_ident_nospvar simple_ident_q
         field_or_var limit_option
         part_func_expr
+        window_func_expr
+        window_func
+        simple_window_func
         function_call_keyword
         function_call_nonkeyword
         function_call_generic
@@ -2010,6 +2033,15 @@ END_OF_INPUT
 %type <cond_info_item_name> condition_information_item_name;
 %type <cond_info_list> condition_information;
 
+%type <NONE> opt_window_clause window_def_list window_def window_spec
+%type <lex_str_ptr> window_name
+%type <NONE> opt_window_ref opt_window_frame_clause
+%type <frame_units> window_frame_units;
+%type <NONE> window_frame_extent;
+%type <frame_exclusion> opt_window_frame_exclusion;
+%type <window_frame_bound> window_frame_start window_frame_bound;
+
+
 %type <NONE>
         '-' '+' '*' '/' '%' '(' ')'
         ',' '!' '{' '}' '&' '|' AND_SYM OR_SYM OR_OR_SYM BETWEEN_SYM CASE_SYM
@@ -4952,7 +4984,7 @@ opt_create_partitioning:
 /*
  This part of the parser is about handling of the partition information.
 
- It's first version was written by Mikael Ronström with lots of answers to
+ It's first version was written by Mikael Ronstrm with lots of answers to
  questions provided by Antony Curtis.
 
  The partition grammar can be called from three places.
@@ -7415,7 +7447,7 @@ alter_commands:
         | remove_partitioning
         | partitioning
 /*
-  This part was added for release 5.1 by Mikael Ronström.
+  This part was added for release 5.1 by Mikael Ronstrm.
   From here we insert a number of commands to manage the partitions of a
   partitioned table such as adding partitions, dropping partitions,
   reorganising partitions in various manners. In future releases the list
@@ -8537,19 +8569,20 @@ select_part2:
           opt_where_clause
           opt_group_clause
           opt_having_clause
+          opt_window_clause
           opt_order_clause
           opt_limit_clause
           opt_procedure_clause
           opt_into
           opt_select_lock_type
           {
-            if ($2 && $10)
+            if ($2 && $11)
             {
               /* double "INTO" clause */
               my_error(ER_WRONG_USAGE, MYF(0), "INTO", "INTO");
               MYSQL_YYABORT;
             }
-            if ($9 && ($2 || $10))
+            if ($10 && ($2 || $11))
             {
               /* "INTO" with "PROCEDURE ANALYSE" */
               my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "INTO");
@@ -8577,6 +8610,7 @@ table_expression:
           opt_where_clause
           opt_group_clause
           opt_having_clause
+          opt_window_clause
           opt_order_clause
           opt_limit_clause
           opt_procedure_clause
@@ -9301,6 +9335,7 @@ simple_expr:
         | param_marker { $$= $1; }
         | variable
         | sum_expr
+        | window_func_expr
         | simple_expr OR_OR_SYM simple_expr
           {
             $$= new (thd->mem_root) Item_func_concat(thd, $1, $3);
@@ -10436,6 +10471,85 @@ sum_expr:
           }
         ;
 
+window_func_expr:
+          window_func OVER_SYM window_name
+          {
+            $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, $3);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+            if (Select->add_window_func((Item_window_func *) $$))
+              MYSQL_YYABORT;
+          }
+        |
+          window_func OVER_SYM window_spec
+          {
+            LEX *lex= Lex;
+            if (Select->add_window_spec(thd, lex->win_ref,
+                                        Select->group_list,
+                                        Select->order_list,
+                                        lex->win_frame))
+              MYSQL_YYABORT;
+            $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1,
+                                                      thd->lex->win_spec); 
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+            if (Select->add_window_func((Item_window_func *) $$))
+              MYSQL_YYABORT;
+          }
+        ;
+
+window_func:
+          simple_window_func
+        |
+          sum_expr
+        ;
+
+simple_window_func:
+          ROW_NUMBER_SYM '(' ')'
+          {
+            $$= new (thd->mem_root) Item_sum_row_number(thd);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          RANK_SYM '(' ')'
+          {
+            $$= new (thd->mem_root) Item_sum_rank(thd);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          DENSE_RANK_SYM '(' ')'
+          {
+            $$= new (thd->mem_root) Item_sum_dense_rank(thd);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          PERCENT_RANK_SYM '(' ')'
+          {
+            $$= new (thd->mem_root) Item_sum_percent_rank(thd);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        |
+          CUME_DIST_SYM '(' ')'
+          {
+            $$= new (thd->mem_root) Item_sum_cume_dist(thd);
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        ;
+
+window_name:
+          ident
+          {
+            $$= (LEX_STRING *) thd->memdup(&$1, sizeof(LEX_STRING));
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        ;
+
 variable:
           '@'
           {
@@ -11371,6 +11485,155 @@ olap_opt:
         ;
 
 /*
+  optional window clause in select
+*/
+
+opt_window_clause:
+          /* empty */
+          {}
+        | WINDOW_SYM
+          window_def_list
+          {}
+        ;
+
+window_def_list:
+          window_def_list ',' window_def
+        | window_def
+        ;
+
+window_def:
+          window_name AS window_spec
+          { 
+            LEX *lex= Lex;
+            if (Select->add_window_def(thd, $1, lex->win_ref,
+                                       Select->group_list,
+                                       Select->order_list,
+                                       lex->win_frame ))
+              MYSQL_YYABORT;
+          }
+        ;
+
+window_spec:
+          '(' 
+          { Select->prepare_add_window_spec(thd); }
+          opt_window_ref opt_window_partition_clause
+          opt_window_order_clause opt_window_frame_clause
+          ')'
+        ;
+
+opt_window_ref:
+          /* empty */ {} 
+        | ident
+          {
+            thd->lex->win_ref= (LEX_STRING *) thd->memdup(&$1, sizeof(LEX_STRING));
+            if (thd->lex->win_ref == NULL)
+              MYSQL_YYABORT;
+          }
+
+opt_window_partition_clause:
+          /* empty */ { }
+        | PARTITION_SYM BY group_list
+        ;
+
+opt_window_order_clause:
+          /* empty */ { }
+        | ORDER_SYM BY order_list
+        ;
+
+opt_window_frame_clause:
+          /* empty */ {}
+        | window_frame_units window_frame_extent opt_window_frame_exclusion
+          {
+            LEX *lex= Lex;
+            lex->win_frame=
+              new (thd->mem_root) Window_frame($1,
+                                               lex->frame_top_bound,
+                                               lex->frame_bottom_bound,
+                                               $3);
+            if (lex->win_frame == NULL)
+              MYSQL_YYABORT;
+          }
+        ;
+
+window_frame_units:
+          ROWS_SYM { $$= Window_frame::UNITS_ROWS; }
+        | RANGE_SYM { $$= Window_frame::UNITS_RANGE; }
+        ;
+         
+window_frame_extent:
+          window_frame_start
+          {
+            LEX *lex= Lex;
+            lex->frame_top_bound= $1;
+            lex->frame_bottom_bound=
+              new (thd->mem_root)
+                Window_frame_bound(Window_frame_bound::CURRENT, NULL);
+            if (lex->frame_bottom_bound == NULL)
+              MYSQL_YYABORT;
+          }
+        | BETWEEN_SYM window_frame_bound AND_SYM window_frame_bound
+          {
+            LEX *lex= Lex;
+            lex->frame_top_bound= $2;
+            lex->frame_bottom_bound= $4;
+          }
+        ;
+
+window_frame_start:
+          UNBOUNDED_SYM PRECEDING_SYM
+          {
+            $$= new (thd->mem_root) 
+                  Window_frame_bound(Window_frame_bound::PRECEDING, NULL); 
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          } 
+        | CURRENT_SYM ROW_SYM
+          { 
+            $$= new (thd->mem_root)
+                  Window_frame_bound(Window_frame_bound::CURRENT, NULL); 
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        | literal PRECEDING_SYM
+          {
+            $$= new (thd->mem_root)
+                  Window_frame_bound(Window_frame_bound::PRECEDING, $1); 
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        ;
+
+window_frame_bound:
+          window_frame_start { $$= $1; }
+        | UNBOUNDED_SYM FOLLOWING_SYM        
+          {
+            $$= new (thd->mem_root)
+                  Window_frame_bound(Window_frame_bound::FOLLOWING, NULL); 
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          } 
+        | literal FOLLOWING_SYM
+          {
+            $$= new (thd->mem_root)
+                  Window_frame_bound(Window_frame_bound::FOLLOWING, $1); 
+            if ($$ == NULL)
+              MYSQL_YYABORT;
+          }
+        ;
+
+opt_window_frame_exclusion:
+          /* empty */ { $$= Window_frame::EXCL_NONE; }
+        | EXCLUDE_SYM CURRENT_SYM ROW_SYM
+          { $$= Window_frame::EXCL_CURRENT_ROW; }
+        | EXCLUDE_SYM GROUP_SYM
+          { $$= Window_frame::EXCL_GROUP; }
+        | EXCLUDE_SYM TIES_SYM
+          { $$= Window_frame::EXCL_TIES; }
+        | EXCLUDE_SYM NO_SYM OTHERS_SYM
+          { $$= Window_frame::EXCL_NONE; }
+        ;      
+       
+/*
   Order by statement in ALTER TABLE
 */
 
diff --git a/sql/table.cc b/sql/table.cc
index 07e2876f5ba..dc1730b5b6f 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -5748,7 +5748,7 @@ void TABLE::mark_columns_used_by_index(uint index)
   MY_BITMAP *bitmap= &tmp_set;
   DBUG_ENTER("TABLE::mark_columns_used_by_index");
 
-  enable_keyread();
+  set_keyread(true);
   bitmap_clear_all(bitmap);
   mark_columns_used_by_index_no_reset(index, bitmap);
   column_bitmaps_set(bitmap, bitmap);
@@ -5769,7 +5769,7 @@ void TABLE::add_read_columns_used_by_index(uint index)
   MY_BITMAP *bitmap= &tmp_set;
   DBUG_ENTER("TABLE::add_read_columns_used_by_index");
 
-  enable_keyread();
+  set_keyread(true);
   bitmap_copy(bitmap, read_set);
   mark_columns_used_by_index_no_reset(index, bitmap);
   column_bitmaps_set(bitmap, write_set);
@@ -5792,7 +5792,7 @@ void TABLE::restore_column_maps_after_mark_index()
 {
   DBUG_ENTER("TABLE::restore_column_maps_after_mark_index");
 
-  disable_keyread();
+  set_keyread(false);
   default_column_bitmaps();
   file->column_bitmaps_signal();
   DBUG_VOID_RETURN;
diff --git a/sql/table.h b/sql/table.h
index 1c461d96097..aa7c11ba3d8 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -213,8 +213,13 @@ typedef struct st_order {
   Field  *fast_field_copier_setup;
   int    counter;                       /* position in SELECT list, correct
                                            only if counter_used is true*/
-  bool	 asc;				/* true if ascending */
-  bool	 free_me;			/* true if item isn't shared  */
+  enum enum_order {
+    ORDER_NOT_RELEVANT,
+    ORDER_ASC,
+    ORDER_DESC
+  };
+
+  enum_order direction;                 /* Requested direction of ordering */
   bool	 in_field_list;			/* true if in select field list */
   bool   counter_used;                  /* parameter was counter of columns */
   Field  *field;			/* If tmp-table group */
@@ -1239,7 +1244,9 @@ public:
   bool alias_name_used;              /* true if table_name is alias */
   bool get_fields_in_item_tree;      /* Signal to fix_field */
   bool m_needs_reopen;
+private:
   bool created;    /* For tmp tables. TRUE <=> tmp table was actually created.*/
+public:
 #ifdef HAVE_REPLICATION
   /* used in RBR Triggers */
   bool master_had_triggers;
@@ -1351,30 +1358,46 @@ public:
     map= map_arg;
     tablenr= tablenr_arg;
   }
-  inline void enable_keyread()
+
+  void set_keyread(bool flag)
   {
-    DBUG_ENTER("enable_keyread");
-    DBUG_ASSERT(key_read == 0);
-    key_read= 1;
-    file->extra(HA_EXTRA_KEYREAD);
-    DBUG_VOID_RETURN;
+    DBUG_ASSERT(file);
+    if (flag && !key_read)
+    {
+      key_read= 1;
+      if (is_created())
+        file->extra(HA_EXTRA_KEYREAD);
+    }
+    else if (!flag && key_read)
+    {
+      key_read= 0;
+      if (is_created())
+        file->extra(HA_EXTRA_NO_KEYREAD);
+    }
   }
+
+  /// Return true if table is instantiated, and false otherwise.
+  bool is_created() const { return created; }
+
+  /**
+    Set the table as "created", and enable flags in storage engine
+    that could not be enabled without an instantiated table.
+  */
+  void set_created()
+  {
+    if (created)
+      return;
+    if (key_read)
+      file->extra(HA_EXTRA_KEYREAD);
+    created= true;
+  }
+
   /*
     Returns TRUE if the table is filled at execution phase (and so, the
     optimizer must not do anything that depends on the contents of the table,
     like range analysis or constant table detection)
   */
   bool is_filled_at_execution();
-  inline void disable_keyread()
-  {
-    DBUG_ENTER("disable_keyread");
-    if (key_read)
-    {
-      key_read= 0;
-      file->extra(HA_EXTRA_NO_KEYREAD);
-    }
-    DBUG_VOID_RETURN;
-  }
 
   bool update_const_key_parts(COND *conds);
 
@@ -2049,6 +2072,11 @@ struct TABLE_LIST
   /* TRUE <=> this table is a const one and was optimized away. */
   bool optimized_away;
 
+  /**
+    TRUE <=> already materialized. Valid only for materialized derived
+    tables/views.
+  */
+  bool materialized;
   /* I_S: Flags to open_table (e.g. OPEN_TABLE_ONLY or OPEN_VIEW_ONLY) */
   uint i_s_requested_object;